# load required packages
library(dplyr)
library(wooldridge)
library(rcompanion)
library(readxl)
library(tidyr)
library(ggplot2) 
library(car)
library(sandwich)
library(lmtest)
library(caret)
library(pca3d)
library(rms)
library(leaps)
library(psych)
library(animation)
library(factoextra)
library(lubridate) 
library(AER)    # applied econometrics with R
library(plm)    # panel-lm
library(stargazer)      # popular package for regression table-making
library(lattice)
library(stringr)

Part A: Data preparation

Dataset required: charts.csv, charts_no_duplicate.csv and df.csv

In this section, we performed data preparation on data set with R. It is a necessary step before we can further proceed with our data analysis

rank = read.csv(file = 'charts.csv', header= TRUE)
rank_no_dup = read.csv(file = 'charts_no_duplicate.csv', header= TRUE)
music_style =  read.csv(file = 'df.csv', header= TRUE)

Summary & Structure of the data set

summary(rank)
##      date                rank           song              artist         
##  Length:330087      Min.   :  1.0   Length:330087      Length:330087     
##  Class :character   1st Qu.: 26.0   Class :character   Class :character  
##  Mode  :character   Median : 51.0   Mode  :character   Mode  :character  
##                     Mean   : 50.5                                        
##                     3rd Qu.: 76.0                                        
##                     Max.   :100.0                                        
##                                                                          
##    last.week        peak.rank      weeks.on.board  
##  Min.   :  1.00   Min.   :  1.00   Min.   : 1.000  
##  1st Qu.: 23.00   1st Qu.: 13.00   1st Qu.: 4.000  
##  Median : 47.00   Median : 38.00   Median : 7.000  
##  Mean   : 47.59   Mean   : 40.97   Mean   : 9.162  
##  3rd Qu.: 72.00   3rd Qu.: 65.00   3rd Qu.:13.000  
##  Max.   :100.00   Max.   :100.00   Max.   :90.000  
##  NA's   :32312
summary(rank_no_dup)
##      song              artist                id      
##  Length:9197        Length:9197        Min.   :   1  
##  Class :character   Class :character   1st Qu.:2300  
##  Mode  :character   Mode  :character   Median :4599  
##                                        Mean   :4599  
##                                        3rd Qu.:6898  
##                                        Max.   :9197
summary(music_style)
##        X            song              album              artist         
##  Min.   :   0   Length:9197        Length:9197        Length:9197       
##  1st Qu.:2299   Class :character   Class :character   Class :character  
##  Median :4598   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :4598                                                           
##  3rd Qu.:6897                                                           
##  Max.   :9196                                                           
##                                                                         
##  release_date           length         popularity     danceability   
##  Length:9197        Min.   : 34306   Min.   : 0.00   Min.   :0.0768  
##  Class :character   1st Qu.:191920   1st Qu.:53.00   1st Qu.:0.5340  
##  Mode  :character   Median :217553   Median :65.00   Median :0.6400  
##                     Mean   :221931   Mean   :61.45   Mean   :0.6356  
##                     3rd Qu.:245746   3rd Qu.:73.00   3rd Qu.:0.7460  
##                     Max.   :992160   Max.   :98.00   Max.   :0.9810  
##                     NA's   :1        NA's   :1       NA's   :1       
##   acousticness           energy        instrumentalness       liveness     
##  Min.   :0.0000023   Min.   :0.00379   Min.   :0.0000000   Min.   :0.0193  
##  1st Qu.:0.0216750   1st Qu.:0.55400   1st Qu.:0.0000000   1st Qu.:0.0961  
##  Median :0.0908500   Median :0.68700   Median :0.0000000   Median :0.1270  
##  Mean   :0.1872813   Mean   :0.66874   Mean   :0.0138394   Mean   :0.1831  
##  3rd Qu.:0.2652500   3rd Qu.:0.80700   3rd Qu.:0.0000204   3rd Qu.:0.2310  
##  Max.   :0.9950000   Max.   :0.99900   Max.   :0.9850000   Max.   :0.9850  
##  NA's   :1           NA's   :1         NA's   :1           NA's   :1       
##     loudness        speechiness         tempo        time_signature 
##  Min.   :-42.887   Min.   :0.0224   Min.   : 46.17   Min.   :1.000  
##  1st Qu.: -7.346   1st Qu.:0.0365   1st Qu.: 97.86   1st Qu.:4.000  
##  Median : -5.791   Median :0.0575   Median :121.54   Median :4.000  
##  Mean   : -6.233   Mean   :0.1125   Mean   :122.34   Mean   :3.964  
##  3rd Qu.: -4.576   3rd Qu.:0.1460   3rd Qu.:142.37   3rd Qu.:4.000  
##  Max.   : -0.463   Max.   :0.9610   Max.   :211.89   Max.   :5.000  
##  NA's   :1         NA's   :1        NA's   :1        NA's   :1      
##        id      
##  Min.   :   1  
##  1st Qu.:2300  
##  Median :4599  
##  Mean   :4599  
##  3rd Qu.:6898  
##  Max.   :9197  
## 
# display the structure of an object in R.

#'rank' is a dataframe with 330087 obs and and 7 variables.
str(rank)
## 'data.frame':    330087 obs. of  7 variables:
##  $ date          : chr  "6/11/21" "6/11/21" "6/11/21" "6/11/21" ...
##  $ rank          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ song          : chr  "Easy On Me" "Stay" "Industry Baby" "Fancy Like" ...
##  $ artist        : chr  "Adele" "The Kid LAROI & Justin Bieber" "Lil Nas X & Jack Harlow" "Walker Hayes" ...
##  $ last.week     : int  1 2 3 4 5 6 9 7 11 8 ...
##  $ peak.rank     : int  1 1 1 3 2 1 7 1 9 2 ...
##  $ weeks.on.board: int  3 16 14 19 18 8 7 24 20 56 ...
#'rank_no_dup'is a dataframe with 9197 obs
str(rank_no_dup)
## 'data.frame':    9197 obs. of  3 variables:
##  $ song  : chr  "Easy On Me" "Stay" "Industry Baby" "Fancy Like" ...
##  $ artist: chr  "Adele" "The Kid LAROI & Justin Bieber" "Lil Nas X & Jack Harlow" "Walker Hayes" ...
##  $ id    : int  1 2 3 4 5 6 7 8 9 10 ...
#'music_style'is a dataframe with 9197 obs
str(music_style)
## 'data.frame':    9197 obs. of  17 variables:
##  $ X               : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ song            : chr  "Easy On Me" "STAY (with Justin Bieber)" "INDUSTRY BABY (feat. Jack Harlow)" "Fancy Like" ...
##  $ album           : chr  "Easy On Me" "STAY (with Justin Bieber)" "INDUSTRY BABY (feat. Jack Harlow)" "Country Stuff" ...
##  $ artist          : chr  "Adele" "The Kid LAROI" "Lil Nas X" "Walker Hayes" ...
##  $ release_date    : chr  "14/10/21" "9/7/21" "23/7/21" "4/6/21" ...
##  $ length          : num  224694 141805 212000 161853 230746 ...
##  $ popularity      : num  95 97 95 82 96 88 96 95 93 90 ...
##  $ danceability    : num  0.604 0.591 0.736 0.649 0.807 0.803 0.788 0.563 0.664 0.702 ...
##  $ acousticness    : num  0.578 0.0383 0.0203 0.114 0.0451 0.000619 0.281 0.335 0.304 0.00883 ...
##  $ energy          : num  0.366 0.764 0.704 0.76 0.893 0.597 0.859 0.664 0.609 0.825 ...
##  $ instrumentalness: num  0.00 0.00 0.00 0.00 2.79e-05 4.50e-06 0.00 0.00 0.00 0.00 ...
##  $ liveness        : num  0.133 0.103 0.0501 0.317 0.366 0.323 0.0424 0.0849 0.0926 0.0674 ...
##  $ loudness        : num  -7.52 -5.48 -7.41 -6.49 -3.75 ...
##  $ speechiness     : num  0.0282 0.0483 0.0615 0.0574 0.0347 0.141 0.0856 0.154 0.0707 0.0601 ...
##  $ tempo           : num  142 170 150 80 126 ...
##  $ time_signature  : num  4 4 4 4 4 4 4 4 4 4 ...
##  $ id              : int  1 2 3 4 5 6 7 8 9 10 ...

Merging of main_df

#colnames(music_style)[1]<- "song"
#Using "song" and "artist" as key for rank dataframe join
main_df_rank <- full_join(rank, rank_no_dup ,by=c("song","artist"))

#Convert char to date
main_df_rank$date <- as.Date(main_df_rank$date, format = "%d/%m/%y")

#Handle incorrect dates due to 2004 and 2008 POSIX standards. e.g. 1968 become 2068
main_df_rank$date <- as.Date(ifelse(main_df_rank$date > Sys.Date(), format(main_df_rank$date, "%d/%m/19%y"), format(main_df_rank$date)))

#Rearrange dataframe according to date and filter for songs only after year 2000
main_df_rank <- main_df_rank %>% 
  arrange(desc(main_df_rank$date),main_df_rank$rank) %>% 
  filter(main_df_rank$date >= "2000-01-01")

main_df_rank <- distinct(main_df_rank)


########### df.csv need to update, first colunm is 'song', removed by code#####
music_style = subset(music_style, select = -c (X))

#Convert char to date
music_style$release_date <- as.Date(music_style$release_date, format = "%d/%m/%y")

#Handle incorrect dates due to 2004 and 2008 POSIX standards. e.g. 1968 become 2068
music_style$release_date <- as.Date(ifelse(music_style$release_date > Sys.Date(), format(music_style$release_date, "%d/%m/19%y"), format(music_style$release_date)))


#Using "id" as key for dataframe join between rank and music_stytle
main_df <- full_join(main_df_rank, music_style,by=c('id'))

Manipulate main_df to get required data set for analysis

main_df <- main_df %>% 
                mutate(loudness_sqr = loudness^2 )%>%
                mutate(length_sec = length/(10^3))%>%
                mutate(ever_on_billboard = case_when (is.na(last.week) ~ 0,is.numeric(last.week) ~ 1, TRUE ~ 1))%>%
                mutate(min_8weeks = case_when (weeks.on.board > 8 ~ 1, weeks.on.board <= 8 ~ 0, TRUE ~ 1))

#mutate control variable hit_song, ever on bill board and more than 8 weeks will consider a song as hit song
main_df <- main_df %>%
        mutate(hit_song = case_when(ever_on_billboard ==1 & min_8weeks == 1 ~1, ever_on_billboard ==0 & min_8weeks == 1 ~0, ever_on_billboard ==1 & min_8weeks == 0 ~0, ever_on_billboard ==0 & min_8weeks == 0 ~0,TRUE ~ 1))

#select relevent music stytle colunm 
main_df_selected <- main_df %>% 
                    select(date, rank, id, length, length_sec,popularity, danceability, acousticness,energy, instrumentalness, liveness, loudness, loudness_sqr, speechiness, tempo, time_signature,ever_on_billboard, min_8weeks, hit_song)


#To get highest rank for unique song
main_df_bestRank <- main_df_selected %>%
                  group_by(id) %>%
                  mutate(best_rank = min(c_across(starts_with("rank")), 
                  na.rm = TRUE))%>%
                  slice(which.min(best_rank))

#To get mean rank for unique song
main_df_meanRank <- main_df_selected %>%
                  group_by(id) %>%
                  mutate(mean_rank = mean(c_across(starts_with("rank")), 
                   na.rm = TRUE))%>%
                  slice(which.min(mean_rank))


#Consolidated df with bestrank and mean rank for unique song
main_df_rankInfo <- main_df_selected %>%
                  group_by(id) %>%
                  mutate(best_rank = min(c_across(starts_with("rank")), 
                  na.rm = TRUE))%>%
                  mutate(mean_rank = mean(c_across(starts_with("rank")), 
                  na.rm = TRUE))%>%
                  slice(which.min(best_rank))%>%
                  slice(which.min(mean_rank))

#Year of interest: 2008, 2019 and 2020
main_df_selected_08 = main_df_selected %>%
                      filter(main_df_selected$date >= "2008-01-01", main_df_selected$date <= "2008-12-31" )

main_df_selected_19 = main_df_selected %>%
                      filter(main_df_selected$date >= "2019-01-01", main_df_selected$date <= "2019-12-31" )

main_df_selected_20 = main_df_selected %>%
                      filter(main_df_selected$date >= "2020-01-01", main_df_selected$date <= "2020-12-31" )


main_df_hit_song = main_df_selected %>%
                      filter(main_df_selected$hit_song == 1 )

# To get the hit_song for rank
main_df_hit_song_rank <- main_df_rankInfo %>%
                        subset(main_df_rankInfo$hit_song>0)


is.pbalanced(main_df)
## [1] TRUE
is.pbalanced(main_df_rankInfo)
## [1] FALSE
is.pbalanced(main_df_selected)  
## [1] TRUE

Part B: Descriptive Analytics

plot(main_df_hit_song_rank$length_sec,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$danceability,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$acousticness,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$energy,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$instrumentalness,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$liveness,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$loudness_sqr,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$speechiness,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$tempo,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$time_signature,main_df_hit_song_rank$popularity)

plot(main_df_hit_song_rank$length_sec,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$danceability,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$acousticness,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$energy,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$instrumentalness,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$liveness,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$loudness_sqr,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$speechiness,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$tempo,main_df_hit_song_rank$mean_rank)

plot(main_df_hit_song_rank$time_signature,main_df_hit_song_rank$mean_rank)

Get Mean of Attributes for each Year

#get attribute of mean for each year
attri_by_yr <- main_df %>%
  mutate(year = as.numeric(format(date, "%Y"))) %>%
  select(year,length_sec,popularity, danceability, acousticness,energy, instrumentalness, liveness, loudness_sqr, speechiness, tempo, time_signature) %>%
  na.omit(attri_by_yr) %>%
  group_by(year) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

Get Mean of Attributes for each Year

plot(attri_by_yr$year,attri_by_yr$`mean(length_sec)`, xlab ="Year" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_by_yr$`mean(length_sec)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(popularity)`, xlab ="Year",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(popularity)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(danceability)`, xlab ="Year",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(danceability)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(acousticness)`, xlab ="Year",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(acousticness)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(energy)`, xlab ="Year",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(energy)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(instrumentalness)`, xlab ="Year",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(instrumentalness)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(liveness)`, xlab ="Year",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(liveness)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(loudness_sqr)`, xlab ="Year",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(loudness_sqr)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(speechiness)`, xlab ="Year",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(speechiness)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(tempo)`, xlab ="Year",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(tempo)` ~ attri_by_yr$year))

plot(attri_by_yr$year,attri_by_yr$`mean(time_signature)`, xlab ="Year",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_by_yr$`mean(time_signature)` ~ attri_by_yr$year))

#get attribute of mean for each month and year
attri_by_mth <- main_df %>%
  mutate(yearmth = format(date, "%Y%m"), year = as.numeric(format(date, "%Y")), month = as.numeric(format(date, "%m"))) %>%
  select(year,month, yearmth,length_sec,popularity, danceability, acousticness,energy, instrumentalness, liveness, loudness_sqr, speechiness, tempo, time_signature) %>%
  na.omit(attri_by_mth)

attri_2000 <- attri_by_mth %>%
  filter( year == 2000)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2001 <- attri_by_mth %>%
  filter( year == 2001)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2002 <- attri_by_mth %>%
  filter( year == 2002)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2003 <- attri_by_mth %>%
  filter( year == 2003)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2004 <- attri_by_mth %>%
  filter( year == 2004)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2005 <- attri_by_mth %>%
  filter( year == 2005)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2006 <- attri_by_mth %>%
  filter( year == 2006)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2007 <- attri_by_mth %>%
  filter( year == 2007)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2008 <- attri_by_mth %>%
  filter( year == 2008)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2009 <- attri_by_mth %>%
  filter( year == 2009)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))
attri_2010 <- attri_by_mth %>%
  filter( year == 2010)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2011 <- attri_by_mth %>%
  filter( year == 2011)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2012 <- attri_by_mth %>%
  filter( year == 2012)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2013 <- attri_by_mth %>%
  filter( year == 2013)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2014 <- attri_by_mth %>%
  filter( year == 2014)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2015 <- attri_by_mth %>%
  filter( year == 2015)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2016 <- attri_by_mth %>%
  filter( year == 2016)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2017 <- attri_by_mth %>%
  filter( year == 2017)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2018 <- attri_by_mth %>%
  filter( year == 2018)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2019 <- attri_by_mth %>%
  filter( year == 2019)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2020 <- attri_by_mth %>%
  filter( year == 2020)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

attri_2021 <- attri_by_mth %>%
  filter( year == 2021)  %>%
  group_by(yearmth) %>%
    summarise(mean(length_sec),mean(popularity),mean(danceability),mean(acousticness),mean(energy), mean(instrumentalness), mean(liveness), mean(loudness_sqr), mean(speechiness), mean(tempo), mean(time_signature))

Get mean of Attributes for Year 2000

plot(attri_2000$yearmth,attri_2000$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2000$`mean(length_sec)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(length_sec)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2000$`mean(popularity)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(popularity)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2000$`mean(danceability)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(danceability)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2000$`mean(acousticness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(acousticness)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2000$`mean(energy)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(energy)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2000$`mean(instrumentalness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(instrumentalness)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2000$`mean(liveness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(liveness)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2000$`mean(loudness_sqr)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(loudness_sqr)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2000$`mean(speechiness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(speechiness)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2000$`mean(tempo)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(tempo)` ~ attri_2000$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2000$yearmth,attri_2000$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2000$`mean(time_signature)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(time_signature)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2001

plot(attri_2001$yearmth,attri_2001$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2001$`mean(length_sec)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(length_sec)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2001$`mean(popularity)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(popularity)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2001$`mean(danceability)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(danceability)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2001$`mean(acousticness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(acousticness)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2001$`mean(energy)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(energy)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2001$`mean(instrumentalness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(instrumentalness)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2001$`mean(liveness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(liveness)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2001$`mean(loudness_sqr)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(loudness_sqr)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2001$`mean(speechiness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(speechiness)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2001$`mean(tempo)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(tempo)` ~ attri_2001$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2001$`mean(time_signature)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(time_signature)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2002

plot(attri_2002$yearmth,attri_2002$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2002$`mean(length_sec)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(length_sec)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2002$`mean(popularity)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(popularity)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2002$`mean(danceability)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(danceability)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2002$`mean(acousticness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(acousticness)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2002$`mean(energy)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(energy)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2002$`mean(instrumentalness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(instrumentalness)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2002$`mean(liveness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(liveness)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2002$`mean(loudness_sqr)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(loudness_sqr)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2002$`mean(speechiness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(speechiness)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2002$`mean(tempo)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(tempo)` ~ attri_2002$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2002$`mean(time_signature)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(time_signature)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2003

plot(attri_2003$yearmth,attri_2003$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2003$`mean(length_sec)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(length_sec)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2003$`mean(popularity)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(popularity)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2003$`mean(danceability)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(danceability)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2003$`mean(acousticness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(acousticness)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2003$`mean(energy)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(energy)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2003$`mean(instrumentalness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(instrumentalness)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2003$`mean(liveness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(liveness)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2003$`mean(loudness_sqr)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(loudness_sqr)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2003$`mean(speechiness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(speechiness)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2003$`mean(tempo)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(tempo)` ~ attri_2003$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2003$`mean(time_signature)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(time_signature)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2004

plot(attri_2004$yearmth,attri_2004$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2004$`mean(length_sec)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(length_sec)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2004$`mean(popularity)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(popularity)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2004$`mean(danceability)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(danceability)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2004$`mean(acousticness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(acousticness)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2004$`mean(energy)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(energy)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2004$`mean(instrumentalness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(instrumentalness)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2004$`mean(liveness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(liveness)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2004$`mean(loudness_sqr)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(loudness_sqr)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2004$`mean(speechiness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(speechiness)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2004$`mean(tempo)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(tempo)` ~ attri_2004$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2004$`mean(time_signature)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(time_signature)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2005

plot(attri_2005$yearmth,attri_2005$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2005$`mean(length_sec)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(length_sec)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2005$`mean(popularity)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(popularity)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2005$`mean(danceability)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(danceability)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2005$`mean(acousticness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(acousticness)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2005$`mean(energy)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(energy)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2005$`mean(instrumentalness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(instrumentalness)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2005$`mean(liveness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(liveness)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2005$`mean(loudness_sqr)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(loudness_sqr)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2005$`mean(speechiness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(speechiness)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2005$`mean(tempo)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(tempo)` ~ attri_2005$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2005$`mean(time_signature)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(time_signature)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2006

plot(attri_2006$yearmth,attri_2006$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2006$`mean(length_sec)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(length_sec)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2006$`mean(popularity)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(popularity)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2006$`mean(danceability)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(danceability)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2006$`mean(acousticness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(acousticness)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2006$`mean(energy)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(energy)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2006$`mean(instrumentalness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(instrumentalness)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2006$`mean(liveness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(liveness)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2006$`mean(loudness_sqr)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(loudness_sqr)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2006$`mean(speechiness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(speechiness)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2006$`mean(tempo)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(tempo)` ~ attri_2006$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2006$`mean(time_signature)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(time_signature)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2007

plot(attri_2007$yearmth,attri_2007$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2007$`mean(length_sec)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(length_sec)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2007$`mean(popularity)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(popularity)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2007$`mean(danceability)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(danceability)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2007$`mean(acousticness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(acousticness)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2007$`mean(energy)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(energy)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2007$`mean(instrumentalness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(instrumentalness)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2007$`mean(liveness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(liveness)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2007$`mean(loudness_sqr)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(loudness_sqr)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2007$`mean(speechiness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(speechiness)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2007$`mean(tempo)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(tempo)` ~ attri_2007$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2007$`mean(time_signature)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(time_signature)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2008

plot(attri_2008$yearmth,attri_2008$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2008$`mean(length_sec)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(length_sec)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2008$`mean(popularity)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(popularity)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2008$`mean(danceability)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(danceability)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2008$`mean(acousticness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(acousticness)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2008$`mean(energy)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(energy)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2008$`mean(instrumentalness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(instrumentalness)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2008$`mean(liveness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(liveness)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2008$`mean(loudness_sqr)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(loudness_sqr)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2008$`mean(speechiness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(speechiness)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2008$`mean(tempo)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(tempo)` ~ attri_2008$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2008$`mean(time_signature)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(time_signature)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2009

plot(attri_2009$yearmth,attri_2009$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2009$`mean(length_sec)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(length_sec)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2009$`mean(popularity)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(popularity)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2009$`mean(danceability)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(danceability)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2009$`mean(acousticness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(acousticness)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2009$`mean(energy)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(energy)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2009$`mean(instrumentalness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(instrumentalness)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2009$`mean(liveness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(liveness)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2009$`mean(loudness_sqr)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(loudness_sqr)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2009$`mean(speechiness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(speechiness)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2009$`mean(tempo)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(tempo)` ~ attri_2009$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2009$`mean(time_signature)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(time_signature)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2010

plot(attri_2010$yearmth,attri_2010$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2010$`mean(length_sec)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(length_sec)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2010$`mean(popularity)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(popularity)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2010$`mean(danceability)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(danceability)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2010$`mean(acousticness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(acousticness)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2010$`mean(energy)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(energy)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2010$`mean(instrumentalness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(instrumentalness)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2010$`mean(liveness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(liveness)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2010$`mean(loudness_sqr)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(loudness_sqr)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2010$`mean(speechiness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(speechiness)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2010$`mean(tempo)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(tempo)` ~ attri_2010$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2010$`mean(time_signature)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(time_signature)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2011

plot(attri_2011$yearmth,attri_2011$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2011$`mean(length_sec)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(length_sec)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2011$`mean(popularity)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(popularity)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2011$`mean(danceability)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(danceability)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2011$`mean(acousticness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(acousticness)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2011$`mean(energy)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(energy)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2011$`mean(instrumentalness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(instrumentalness)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2011$`mean(liveness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(liveness)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2011$`mean(loudness_sqr)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(loudness_sqr)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2011$`mean(speechiness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(speechiness)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2011$`mean(tempo)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(tempo)` ~ attri_2011$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2011$`mean(time_signature)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(time_signature)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2012

plot(attri_2012$yearmth,attri_2012$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2012$`mean(length_sec)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(length_sec)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2012$`mean(popularity)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(popularity)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2012$`mean(danceability)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(danceability)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2012$`mean(acousticness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(acousticness)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2012$`mean(energy)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(energy)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2012$`mean(instrumentalness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(instrumentalness)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2012$`mean(liveness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(liveness)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2012$`mean(loudness_sqr)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(loudness_sqr)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2012$`mean(speechiness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(speechiness)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2012$`mean(tempo)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(tempo)` ~ attri_2012$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2012$`mean(time_signature)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(time_signature)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2013

plot(attri_2013$yearmth,attri_2013$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2013$`mean(length_sec)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(length_sec)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2013$`mean(popularity)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(popularity)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2013$`mean(danceability)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(danceability)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2013$`mean(acousticness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(acousticness)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2013$`mean(energy)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(energy)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2013$`mean(instrumentalness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(instrumentalness)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2013$`mean(liveness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(liveness)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2013$`mean(loudness_sqr)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(loudness_sqr)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2013$`mean(speechiness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(speechiness)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2013$`mean(tempo)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(tempo)` ~ attri_2013$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2013$`mean(time_signature)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(time_signature)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2014

plot(attri_2014$yearmth,attri_2014$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2014$`mean(length_sec)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(length_sec)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2014$`mean(popularity)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(popularity)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2014$`mean(danceability)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(danceability)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2014$`mean(acousticness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(acousticness)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2014$`mean(energy)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(energy)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2014$`mean(instrumentalness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(instrumentalness)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2014$`mean(liveness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(liveness)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2014$`mean(loudness_sqr)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(loudness_sqr)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2014$`mean(speechiness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(speechiness)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2014$`mean(tempo)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(tempo)` ~ attri_2014$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2014$`mean(time_signature)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(time_signature)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2015

plot(attri_2015$yearmth,attri_2015$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2015$`mean(length_sec)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(length_sec)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2015$`mean(popularity)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(popularity)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2015$`mean(danceability)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(danceability)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2015$`mean(acousticness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(acousticness)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2015$`mean(energy)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(energy)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2015$`mean(instrumentalness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(instrumentalness)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2015$`mean(liveness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(liveness)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2015$`mean(loudness_sqr)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(loudness_sqr)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2015$`mean(speechiness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(speechiness)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2015$`mean(tempo)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(tempo)` ~ attri_2015$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2015$`mean(time_signature)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(time_signature)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2016

plot(attri_2016$yearmth,attri_2016$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2016$`mean(length_sec)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(length_sec)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2016$`mean(popularity)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(popularity)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2016$`mean(danceability)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(danceability)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2016$`mean(acousticness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(acousticness)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2016$`mean(energy)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(energy)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2016$`mean(instrumentalness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(instrumentalness)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2016$`mean(liveness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(liveness)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2016$`mean(loudness_sqr)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(loudness_sqr)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2016$`mean(speechiness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(speechiness)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2016$`mean(tempo)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(tempo)` ~ attri_2016$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2016$`mean(time_signature)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(time_signature)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2017

plot(attri_2017$yearmth,attri_2017$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2017$`mean(length_sec)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(length_sec)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2017$`mean(popularity)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(popularity)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2017$`mean(danceability)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(danceability)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2017$`mean(acousticness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(acousticness)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2017$`mean(energy)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(energy)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2017$`mean(instrumentalness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(instrumentalness)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2017$`mean(liveness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(liveness)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2017$`mean(loudness_sqr)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(loudness_sqr)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2017$`mean(speechiness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(speechiness)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2017$`mean(tempo)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(tempo)` ~ attri_2017$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2017$`mean(time_signature)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(time_signature)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2018

plot(attri_2018$yearmth,attri_2018$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2018$`mean(length_sec)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(length_sec)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2018$`mean(popularity)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(popularity)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2018$`mean(danceability)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(danceability)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2018$`mean(acousticness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(acousticness)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2018$`mean(energy)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(energy)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2018$`mean(instrumentalness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(instrumentalness)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2018$`mean(liveness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(liveness)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2018$`mean(loudness_sqr)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(loudness_sqr)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2018$`mean(speechiness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(speechiness)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2018$`mean(tempo)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(tempo)` ~ attri_2018$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2018$`mean(time_signature)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(time_signature)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2019

plot(attri_2019$yearmth,attri_2019$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2019$`mean(length_sec)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(length_sec)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2019$`mean(popularity)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(popularity)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2019$`mean(danceability)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(danceability)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2019$`mean(acousticness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(acousticness)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2019$`mean(energy)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(energy)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2019$`mean(instrumentalness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(instrumentalness)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2019$`mean(liveness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(liveness)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2019$`mean(loudness_sqr)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(loudness_sqr)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2019$`mean(speechiness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(speechiness)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2019$`mean(tempo)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(tempo)` ~ attri_2019$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2019$`mean(time_signature)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(time_signature)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2020

plot(attri_2020$yearmth,attri_2020$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2020$`mean(length_sec)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(length_sec)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2020$`mean(popularity)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(popularity)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2020$`mean(danceability)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(danceability)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2020$`mean(acousticness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(acousticness)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2020$`mean(energy)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(energy)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2020$`mean(instrumentalness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(instrumentalness)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2020$`mean(liveness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(liveness)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2020$`mean(loudness_sqr)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(loudness_sqr)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2020$`mean(speechiness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(speechiness)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2020$`mean(tempo)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(tempo)` ~ attri_2020$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2020$`mean(time_signature)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(time_signature)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of Attributes for Year 2021

plot(attri_2021$yearmth,attri_2021$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2021$`mean(length_sec)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(length_sec)` ~ attri_2021$yearmth)): only
## using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2021$`mean(popularity)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(popularity)` ~ attri_2021$yearmth)): only
## using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2021$`mean(danceability)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(danceability)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2021$`mean(acousticness)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(acousticness)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2021$`mean(energy)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(energy)` ~ attri_2021$yearmth)): only
## using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2021$`mean(instrumentalness)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(instrumentalness)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2021$`mean(liveness)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(liveness)` ~ attri_2021$yearmth)): only
## using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2021$`mean(loudness_sqr)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(loudness_sqr)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2021$`mean(speechiness)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(speechiness)` ~ attri_2021$yearmth)): only
## using the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2021$`mean(tempo)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(tempo)` ~ attri_2021$yearmth)): only using
## the first two of 11 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2021$`mean(time_signature)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(time_signature)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

Get mean of length_sec Attributes

plot(attri_2000$yearmth,attri_2000$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2000$`mean(length_sec)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(length_sec)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2001$`mean(length_sec)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(length_sec)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2002$`mean(length_sec)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(length_sec)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2003$`mean(length_sec)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(length_sec)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2004$`mean(length_sec)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(length_sec)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2005$`mean(length_sec)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(length_sec)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2006$`mean(length_sec)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(length_sec)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2007$`mean(length_sec)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(length_sec)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2008$`mean(length_sec)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(length_sec)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2009$`mean(length_sec)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(length_sec)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2010$`mean(length_sec)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(length_sec)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2011$`mean(length_sec)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(length_sec)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2012$`mean(length_sec)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(length_sec)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2013$`mean(length_sec)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(length_sec)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2014$`mean(length_sec)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(length_sec)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2015$`mean(length_sec)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(length_sec)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2016$`mean(length_sec)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(length_sec)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2017$`mean(length_sec)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(length_sec)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2018$`mean(length_sec)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(length_sec)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2019$`mean(length_sec)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(length_sec)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(length_sec)`, xlab ="Month" ,ylab ="Avg Length of Song(sec)" ,type = "b",pch=16)
abline(lm(attri_2020$`mean(length_sec)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(length_sec)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

Get mean of popularity Attributes

plot(attri_2000$yearmth,attri_2000$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2000$`mean(popularity)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(popularity)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2001$`mean(popularity)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(popularity)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2002$`mean(popularity)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(popularity)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2003$`mean(popularity)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(popularity)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2004$`mean(popularity)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(popularity)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2005$`mean(popularity)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(popularity)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2006$`mean(popularity)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(popularity)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2007$`mean(popularity)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(popularity)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2008$`mean(popularity)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(popularity)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2009$`mean(popularity)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(popularity)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2010$`mean(popularity)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(popularity)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2011$`mean(popularity)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(popularity)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2012$`mean(popularity)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(popularity)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2013$`mean(popularity)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(popularity)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2014$`mean(popularity)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(popularity)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2015$`mean(popularity)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(popularity)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2016$`mean(popularity)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(popularity)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2017$`mean(popularity)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(popularity)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2018$`mean(popularity)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(popularity)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2019$`mean(popularity)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(popularity)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(popularity)`, xlab ="Month",ylab ="Avg Popularity",type = "b", pch=16)
abline(lm(attri_2020$`mean(popularity)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(popularity)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

Get mean of danceability Attributes

plot(attri_2000$yearmth,attri_2000$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2000$`mean(danceability)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(danceability)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2001$`mean(danceability)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(danceability)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2002$`mean(danceability)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(danceability)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2003$`mean(danceability)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(danceability)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2004$`mean(danceability)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(danceability)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2005$`mean(danceability)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(danceability)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2006$`mean(danceability)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(danceability)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2007$`mean(danceability)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(danceability)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2008$`mean(danceability)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(danceability)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2009$`mean(danceability)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(danceability)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2010$`mean(danceability)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(danceability)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2011$`mean(danceability)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(danceability)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2012$`mean(danceability)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(danceability)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2013$`mean(danceability)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(danceability)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2014$`mean(danceability)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(danceability)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2015$`mean(danceability)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(danceability)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2016$`mean(danceability)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(danceability)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2017$`mean(danceability)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(danceability)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2018$`mean(danceability)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(danceability)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2019$`mean(danceability)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(danceability)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2020$`mean(danceability)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(danceability)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2021$yearmth,attri_2021$`mean(danceability)`, xlab ="Month",ylab ="Avg Danceability",type = "b", pch=16)
abline(lm(attri_2021$`mean(danceability)` ~ attri_2021$yearmth))
## Warning in abline(lm(attri_2021$`mean(danceability)` ~ attri_2021$yearmth)):
## only using the first two of 11 regression coefficients

Get mean of acousticness Attributes

plot(attri_2000$yearmth,attri_2000$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2000$`mean(acousticness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(acousticness)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2001$`mean(acousticness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(acousticness)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2002$`mean(acousticness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(acousticness)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2003$`mean(acousticness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(acousticness)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2004$`mean(acousticness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(acousticness)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2005$`mean(acousticness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(acousticness)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2006$`mean(acousticness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(acousticness)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2007$`mean(acousticness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(acousticness)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2008$`mean(acousticness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(acousticness)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2009$`mean(acousticness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(acousticness)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2010$`mean(acousticness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(acousticness)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2011$`mean(acousticness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(acousticness)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2012$`mean(acousticness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(acousticness)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2013$`mean(acousticness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(acousticness)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2014$`mean(acousticness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(acousticness)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2015$`mean(acousticness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(acousticness)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2016$`mean(acousticness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(acousticness)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2017$`mean(acousticness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(acousticness)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2018$`mean(acousticness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(acousticness)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2019$`mean(acousticness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(acousticness)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(acousticness)`, xlab ="Month",ylab ="Avg Acousticness",type = "b", pch=16)
abline(lm(attri_2020$`mean(acousticness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(acousticness)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of energy Attributes

plot(attri_2000$yearmth,attri_2000$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2000$`mean(energy)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(energy)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2001$`mean(energy)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(energy)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2002$`mean(energy)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(energy)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2003$`mean(energy)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(energy)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2004$`mean(energy)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(energy)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2005$`mean(energy)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(energy)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2006$`mean(energy)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(energy)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2007$`mean(energy)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(energy)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2008$`mean(energy)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(energy)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2009$`mean(energy)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(energy)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2010$`mean(energy)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(energy)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2011$`mean(energy)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(energy)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2012$`mean(energy)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(energy)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2013$`mean(energy)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(energy)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2014$`mean(energy)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(energy)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2015$`mean(energy)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(energy)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2016$`mean(energy)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(energy)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2017$`mean(energy)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(energy)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2018$`mean(energy)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(energy)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2019$`mean(energy)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(energy)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(energy)`, xlab ="Month",ylab ="Avg Energy",type = "b", pch=16)
abline(lm(attri_2020$`mean(energy)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(energy)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

Get mean of instrumentalness Attributes

plot(attri_2000$yearmth,attri_2000$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2000$`mean(instrumentalness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(instrumentalness)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2001$`mean(instrumentalness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(instrumentalness)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2002$`mean(instrumentalness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(instrumentalness)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2003$`mean(instrumentalness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(instrumentalness)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2004$`mean(instrumentalness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(instrumentalness)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2005$`mean(instrumentalness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(instrumentalness)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2006$`mean(instrumentalness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(instrumentalness)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2007$`mean(instrumentalness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(instrumentalness)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2008$`mean(instrumentalness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(instrumentalness)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2009$`mean(instrumentalness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(instrumentalness)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2010$`mean(instrumentalness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(instrumentalness)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2011$`mean(instrumentalness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(instrumentalness)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2012$`mean(instrumentalness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(instrumentalness)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2013$`mean(instrumentalness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(instrumentalness)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2014$`mean(instrumentalness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(instrumentalness)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2015$`mean(instrumentalness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(instrumentalness)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2016$`mean(instrumentalness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(instrumentalness)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2017$`mean(instrumentalness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(instrumentalness)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2018$`mean(instrumentalness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(instrumentalness)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2019$`mean(instrumentalness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(instrumentalness)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(instrumentalness)`, xlab ="Month",ylab ="Avg Instrumentalness",type = "b", pch=16)
abline(lm(attri_2020$`mean(instrumentalness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(instrumentalness)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of liveness Attributes

plot(attri_2000$yearmth,attri_2000$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2000$`mean(liveness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(liveness)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2001$`mean(liveness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(liveness)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2002$`mean(liveness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(liveness)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2003$`mean(liveness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(liveness)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2004$`mean(liveness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(liveness)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2005$`mean(liveness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(liveness)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2006$`mean(liveness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(liveness)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2007$`mean(liveness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(liveness)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2008$`mean(liveness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(liveness)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2009$`mean(liveness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(liveness)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2010$`mean(liveness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(liveness)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2011$`mean(liveness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(liveness)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2012$`mean(liveness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(liveness)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2013$`mean(liveness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(liveness)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2014$`mean(liveness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(liveness)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2015$`mean(liveness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(liveness)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2016$`mean(liveness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(liveness)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2017$`mean(liveness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(liveness)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2018$`mean(liveness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(liveness)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2019$`mean(liveness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(liveness)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(liveness)`, xlab ="Month",ylab ="Avg Liveness",type = "b", pch=16)
abline(lm(attri_2020$`mean(liveness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(liveness)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

Get mean of loudness_sqr Attributes

plot(attri_2000$yearmth,attri_2000$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2000$`mean(loudness_sqr)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(loudness_sqr)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2001$`mean(loudness_sqr)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(loudness_sqr)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2002$`mean(loudness_sqr)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(loudness_sqr)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2003$`mean(loudness_sqr)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(loudness_sqr)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2004$`mean(loudness_sqr)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(loudness_sqr)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2005$`mean(loudness_sqr)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(loudness_sqr)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2006$`mean(loudness_sqr)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(loudness_sqr)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2007$`mean(loudness_sqr)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(loudness_sqr)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2008$`mean(loudness_sqr)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(loudness_sqr)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2009$`mean(loudness_sqr)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(loudness_sqr)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2010$`mean(loudness_sqr)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(loudness_sqr)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2011$`mean(loudness_sqr)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(loudness_sqr)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2012$`mean(loudness_sqr)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(loudness_sqr)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2013$`mean(loudness_sqr)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(loudness_sqr)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2014$`mean(loudness_sqr)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(loudness_sqr)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2015$`mean(loudness_sqr)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(loudness_sqr)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2016$`mean(loudness_sqr)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(loudness_sqr)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2017$`mean(loudness_sqr)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(loudness_sqr)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2018$`mean(loudness_sqr)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(loudness_sqr)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2019$`mean(loudness_sqr)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(loudness_sqr)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(loudness_sqr)`, xlab ="Month",ylab ="Avg Loudness(Square of Db)",type = "b", pch=16)
abline(lm(attri_2020$`mean(loudness_sqr)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(loudness_sqr)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

Get mean of speechiness Attributes

plot(attri_2000$yearmth,attri_2000$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2000$`mean(speechiness)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(speechiness)` ~ attri_2000$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2001$`mean(speechiness)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(speechiness)` ~ attri_2001$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2002$`mean(speechiness)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(speechiness)` ~ attri_2002$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2003$`mean(speechiness)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(speechiness)` ~ attri_2003$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2004$`mean(speechiness)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(speechiness)` ~ attri_2004$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2005$`mean(speechiness)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(speechiness)` ~ attri_2005$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2006$`mean(speechiness)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(speechiness)` ~ attri_2006$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2007$`mean(speechiness)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(speechiness)` ~ attri_2007$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2008$`mean(speechiness)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(speechiness)` ~ attri_2008$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2009$`mean(speechiness)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(speechiness)` ~ attri_2009$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2010$`mean(speechiness)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(speechiness)` ~ attri_2010$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2011$`mean(speechiness)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(speechiness)` ~ attri_2011$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2012$`mean(speechiness)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(speechiness)` ~ attri_2012$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2013$`mean(speechiness)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(speechiness)` ~ attri_2013$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2014$`mean(speechiness)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(speechiness)` ~ attri_2014$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2015$`mean(speechiness)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(speechiness)` ~ attri_2015$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2016$`mean(speechiness)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(speechiness)` ~ attri_2016$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2017$`mean(speechiness)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(speechiness)` ~ attri_2017$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2018$`mean(speechiness)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(speechiness)` ~ attri_2018$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2019$`mean(speechiness)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(speechiness)` ~ attri_2019$yearmth)): only
## using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(speechiness)`, xlab ="Month",ylab ="Avg Speechiness",type = "b", pch=16)
abline(lm(attri_2020$`mean(speechiness)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(speechiness)` ~ attri_2020$yearmth)): only
## using the first two of 12 regression coefficients

Get mean of tempo Attributes

plot(attri_2000$yearmth,attri_2000$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2000$`mean(tempo)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(tempo)` ~ attri_2000$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2001$`mean(tempo)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(tempo)` ~ attri_2001$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2002$`mean(tempo)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(tempo)` ~ attri_2002$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2003$`mean(tempo)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(tempo)` ~ attri_2003$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2004$`mean(tempo)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(tempo)` ~ attri_2004$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2005$`mean(tempo)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(tempo)` ~ attri_2005$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2006$`mean(tempo)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(tempo)` ~ attri_2006$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2007$`mean(tempo)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(tempo)` ~ attri_2007$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2008$`mean(tempo)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(tempo)` ~ attri_2008$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2009$`mean(tempo)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(tempo)` ~ attri_2009$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2010$`mean(tempo)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(tempo)` ~ attri_2010$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2011$`mean(tempo)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(tempo)` ~ attri_2011$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2012$`mean(tempo)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(tempo)` ~ attri_2012$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2013$`mean(tempo)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(tempo)` ~ attri_2013$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2014$`mean(tempo)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(tempo)` ~ attri_2014$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2015$`mean(tempo)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(tempo)` ~ attri_2015$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2016$`mean(tempo)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(tempo)` ~ attri_2016$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2017$`mean(tempo)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(tempo)` ~ attri_2017$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2018$`mean(tempo)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(tempo)` ~ attri_2018$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2019$`mean(tempo)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(tempo)` ~ attri_2019$yearmth)): only using
## the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(tempo)`, xlab ="Month",ylab ="Avg Tempo",type = "b", pch=16)
abline(lm(attri_2020$`mean(tempo)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(tempo)` ~ attri_2020$yearmth)): only using
## the first two of 12 regression coefficients

Get mean of time_signature Attributes

plot(attri_2000$yearmth,attri_2000$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2000$`mean(time_signature)` ~ attri_2000$yearmth))
## Warning in abline(lm(attri_2000$`mean(time_signature)` ~ attri_2000$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2001$yearmth,attri_2001$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2001$`mean(time_signature)` ~ attri_2001$yearmth))
## Warning in abline(lm(attri_2001$`mean(time_signature)` ~ attri_2001$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2002$yearmth,attri_2002$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2002$`mean(time_signature)` ~ attri_2002$yearmth))
## Warning in abline(lm(attri_2002$`mean(time_signature)` ~ attri_2002$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2003$yearmth,attri_2003$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2003$`mean(time_signature)` ~ attri_2003$yearmth))
## Warning in abline(lm(attri_2003$`mean(time_signature)` ~ attri_2003$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2004$yearmth,attri_2004$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2004$`mean(time_signature)` ~ attri_2004$yearmth))
## Warning in abline(lm(attri_2004$`mean(time_signature)` ~ attri_2004$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2005$yearmth,attri_2005$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2005$`mean(time_signature)` ~ attri_2005$yearmth))
## Warning in abline(lm(attri_2005$`mean(time_signature)` ~ attri_2005$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2006$yearmth,attri_2006$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2006$`mean(time_signature)` ~ attri_2006$yearmth))
## Warning in abline(lm(attri_2006$`mean(time_signature)` ~ attri_2006$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2007$yearmth,attri_2007$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2007$`mean(time_signature)` ~ attri_2007$yearmth))
## Warning in abline(lm(attri_2007$`mean(time_signature)` ~ attri_2007$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2008$yearmth,attri_2008$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2008$`mean(time_signature)` ~ attri_2008$yearmth))
## Warning in abline(lm(attri_2008$`mean(time_signature)` ~ attri_2008$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2009$yearmth,attri_2009$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2009$`mean(time_signature)` ~ attri_2009$yearmth))
## Warning in abline(lm(attri_2009$`mean(time_signature)` ~ attri_2009$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2010$yearmth,attri_2010$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2010$`mean(time_signature)` ~ attri_2010$yearmth))
## Warning in abline(lm(attri_2010$`mean(time_signature)` ~ attri_2010$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2011$yearmth,attri_2011$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2011$`mean(time_signature)` ~ attri_2011$yearmth))
## Warning in abline(lm(attri_2011$`mean(time_signature)` ~ attri_2011$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2012$yearmth,attri_2012$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2012$`mean(time_signature)` ~ attri_2012$yearmth))
## Warning in abline(lm(attri_2012$`mean(time_signature)` ~ attri_2012$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2013$yearmth,attri_2013$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2013$`mean(time_signature)` ~ attri_2013$yearmth))
## Warning in abline(lm(attri_2013$`mean(time_signature)` ~ attri_2013$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2014$yearmth,attri_2014$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2014$`mean(time_signature)` ~ attri_2014$yearmth))
## Warning in abline(lm(attri_2014$`mean(time_signature)` ~ attri_2014$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2015$yearmth,attri_2015$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2015$`mean(time_signature)` ~ attri_2015$yearmth))
## Warning in abline(lm(attri_2015$`mean(time_signature)` ~ attri_2015$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2016$yearmth,attri_2016$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2016$`mean(time_signature)` ~ attri_2016$yearmth))
## Warning in abline(lm(attri_2016$`mean(time_signature)` ~ attri_2016$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2017$yearmth,attri_2017$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2017$`mean(time_signature)` ~ attri_2017$yearmth))
## Warning in abline(lm(attri_2017$`mean(time_signature)` ~ attri_2017$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2018$yearmth,attri_2018$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2018$`mean(time_signature)` ~ attri_2018$yearmth))
## Warning in abline(lm(attri_2018$`mean(time_signature)` ~ attri_2018$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2019$yearmth,attri_2019$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2019$`mean(time_signature)` ~ attri_2019$yearmth))
## Warning in abline(lm(attri_2019$`mean(time_signature)` ~ attri_2019$yearmth)):
## only using the first two of 12 regression coefficients

plot(attri_2020$yearmth,attri_2020$`mean(time_signature)`, xlab ="Month",ylab ="Avg Time Signature",type = "b", pch=16)
abline(lm(attri_2020$`mean(time_signature)` ~ attri_2020$yearmth))
## Warning in abline(lm(attri_2020$`mean(time_signature)` ~ attri_2020$yearmth)):
## only using the first two of 12 regression coefficients

Get Peak Rank for Song

peak_rank_for_song <- rank %>%
  select(artist,song,peak.rank) %>%
  group_by(artist,song) %>%
  summarise(n = n(),highest_rank = min(peak.rank))
## `summarise()` has grouped output by 'artist'. You can override using the
## `.groups` argument.
head(peak_rank_for_song)
artist song n highest_rank
’N Sync (God Must Have Spent) A Little More Time On You 22 8
’N Sync Bye Bye Bye 23 4
’N Sync Gone 24 11
’N Sync I Drive Myself Crazy 12 67
’N Sync I Want You Back 24 13
’N Sync It’s Gonna Be Me 25 1

Get Peak Rank for Artist

peak_artiste_rank <- peak_rank_for_song %>%
  group_by(artist) %>%
  summarise(peak_rank = min(highest_rank)) %>%
  arrange(artist)

head(peak_artiste_rank)
artist peak_rank
’N Sync 1
’N Sync & Gloria Estefan 2
’N Sync Featuring Nelly 5
’Til Tuesday 8
“Groove” Holmes 44
“Little” Jimmy Dickens 15

Get hit songs (rank of songs < 21)

hit_songs <- peak_rank_for_song %>%
  filter(highest_rank < 21) %>%
  arrange(song)

head(hit_songs)
artist song n highest_rank
Jay-Z Featuring Beyonce Knowles ’03 Bonnie & Clyde 23 4
Paul Davis ’65 Love Affair 20 6
The Angels ’Til 14 14
John Lennon #9 Dream 12 9
Mariah Carey Featuring Miguel #Beautiful 16 15
The Chainsmokers #SELFIE 11 16

Get Artiste with highest number of hits songs (rank of songs < 21)

highest_no_rank1 <- peak_rank_for_song %>%
  filter(highest_rank < 21) %>%
  group_by(artist) %>%
  summarise(n =n()) %>%
  arrange(desc(n))

head(highest_no_rank1)
artist n
Taylor Swift 44
Madonna 42
The Beatles 41
Elton John 35
Stevie Wonder 33
Drake 31

Prelim OLS examine

ols08 = lm(popularity ~ tempo, data = main_df_selected_08)
ols19 = lm(popularity ~ tempo, data = main_df_selected_19)
ols20 = lm(popularity ~ tempo, data = main_df_selected_20)

# quite often, we just apply heteroskedasticity robust s.e.
coeftest(ols08, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 64.0583247  1.1998781 53.3874 < 2.2e-16 ***
## tempo       -0.0312584  0.0096692 -3.2328  0.001234 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(ols19, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 75.5755674  0.6381582 118.4276  < 2e-16 ***
## tempo       -0.0096010  0.0051004  -1.8824  0.05984 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(ols20, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##               Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 77.8633255  0.5589755 139.2965 < 2.2e-16 ***
## tempo       -0.0163223  0.0044818  -3.6419 0.0002733 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# scatterplot and fitted line for year 2008
plot(x = main_df_selected_08$tempo, 
     y = main_df_selected_08$popularity, 
     xlab = "tempo (Beats per second)",
     ylab = "Popularity",
     main = "Popularity vs Tempo in year 2008 - finaicial crisis",
     ylim = c(0, 100),
    )
abline(ols08, lwd = 1.5, col = 'red')

# scatterplot and fitted line for year 2019
plot(x = main_df_selected_19$tempo, 
     y = main_df_selected_19$popularity, 
     xlab = "tempo (Beats per second)",
     ylab = "Popularity",
     main = "Popularity vs Tempo in year 2019 - Before Covid",
     ylim = c(0, 100),
    )
abline(ols19, lwd = 1.5, col = 'red')

# scatterplot and fitted line for year 2020 - Covid
plot(x = main_df_selected_20$tempo, 
     y = main_df_selected_20$popularity, 
     xlab = "tempo (Beats per second)",
     ylab = "Popularity",
     main = "Popularity vs Tempo in year 2020 - Covid",
     ylim = c(0, 100),
    )
abline(ols20, lwd = 1.5, col = 'red')

From these three simple linear regression models on year 2008, 2019 and 2020, it is “surprising” to see that popularityand tempoare negatively associated - for years of crisis and positively associated - for normal year. The coefficient of tempo is significant. Without careful inspection and further analysis, one could’ve concluded that when having global crisis, people trends to listen to slower music and it is more popular.

# let's check the assumption of these OLS models
plot(ols08, c(1,2))

plot(ols19, c(1,2))

plot(ols20, c(1,2))

Not surprisingly, strictly exogeneity assumption is subject to serious challenge since the sample average of residuals do not look have mean zero in all three cases and it shows a slightly positive trend in the residual plot (a signal for systematic biases). This might be the consequences of omitted variable since simple linear regression model does not include any other available covariates. However, even multivariate linear regressions models won’t be able to tell since given such panel data, each rank might have strong heterogeneity for its popularity, i.e. the “fixed effect” \(\alpha_i\), that correlates with all available covariates in the dataset.

We need to take the advantage of panel structure to handle with the fixed effect.

Part C: Data Analysis

K-Mean

# plot within-cluster sum of squares distance'' as a function of the number of clusters
kmeandata = main_df

kmeandata = subset(kmeandata, select = c (length_sec, popularity, danceability, acousticness, energy, instrumentalness, liveness, loudness_sqr, speechiness, tempo, time_signature, hit_song))

kmeandata = as.data.frame(unclass(kmeandata))
summary(kmeandata)
##    length_sec       popularity    danceability     acousticness     
##  Min.   : 34.31   Min.   : 0.0   Min.   :0.0768   Min.   :0.000002  
##  1st Qu.:198.97   1st Qu.:57.0   1st Qu.:0.5450   1st Qu.:0.020500  
##  Median :221.77   Median :68.0   Median :0.6450   Median :0.084300  
##  Mean   :225.20   Mean   :65.1   Mean   :0.6409   Mean   :0.171466  
##  3rd Qu.:246.95   3rd Qu.:77.0   3rd Qu.:0.7440   3rd Qu.:0.245000  
##  Max.   :992.16   Max.   :98.0   Max.   :0.9810   Max.   :0.995000  
##  NA's   :20       NA's   :20     NA's   :20       NA's   :20        
##      energy        instrumentalness      liveness       loudness_sqr      
##  Min.   :0.00379   Min.   :0.000000   Min.   :0.0193   Min.   :   0.2144  
##  1st Qu.:0.56900   1st Qu.:0.000000   1st Qu.:0.0931   1st Qu.:  20.0346  
##  Median :0.69800   Median :0.000000   Median :0.1240   Median :  32.0243  
##  Mean   :0.67964   Mean   :0.011410   Mean   :0.1777   Mean   :  41.9521  
##  3rd Qu.:0.81000   3rd Qu.:0.000017   3rd Qu.:0.2240   3rd Qu.:  50.3248  
##  Max.   :0.99900   Max.   :0.985000   Max.   :0.9850   Max.   :1839.2948  
##  NA's   :20        NA's   :20         NA's   :20       NA's   :20         
##   speechiness         tempo        time_signature     hit_song     
##  Min.   :0.0224   Min.   : 46.17   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:0.0355   1st Qu.: 97.07   1st Qu.:4.000   1st Qu.:0.0000  
##  Median :0.0524   Median :120.01   Median :4.000   Median :1.0000  
##  Mean   :0.1001   Mean   :121.27   Mean   :3.975   Mean   :0.5557  
##  3rd Qu.:0.1190   3rd Qu.:140.07   3rd Qu.:4.000   3rd Qu.:1.0000  
##  Max.   :0.9610   Max.   :211.89   Max.   :5.000   Max.   :1.0000  
##  NA's   :20       NA's   :20       NA's   :20
dim(kmeandata)
## [1] 114100     12
kmeanclean = as.numeric(unlist(kmeandata))
kmeanclean = na.omit(kmeandata)

###################################rank vs popu main_df##################
km_rank_popu = main_df
km_rank_popu = subset(km_rank_popu, select = c (rank, popularity))

km_rank_popu = as.data.frame(unclass(km_rank_popu))
km_rank_popu = na.omit(km_rank_popu)

###################################rank vs popu main_df_rankInfo##################
km_rank_popu_se = main_df_rankInfo
km_rank_popu_se = subset(km_rank_popu_se, select = c (rank, popularity))

km_rank_popu_se = as.data.frame(unclass(km_rank_popu_se))
km_rank_popu_se = na.omit(km_rank_popu_se)



############mean rank vs tempo
km_mrank_tempo_se = main_df_rankInfo
km_mrank_tempo_se = subset(km_mrank_tempo_se, select = c (mean_rank, tempo))

km_mrank_tempo_se = as.data.frame(unclass(km_mrank_tempo_se))
km_mrank_tempo_se = na.omit(km_mrank_tempo_se)

############mean rank vs energy
km_mrank_energy_se = main_df_rankInfo
km_mrank_energy_se = subset(km_mrank_energy_se, select = c (mean_rank, energy))

km_mrank_energy_se = as.data.frame(unclass(km_mrank_energy_se))
km_mrank_energy_se = na.omit(km_mrank_energy_se)


############mean rank vs dancebility
km_mrank_danceability_se = main_df_rankInfo
km_mrank_danceability_se = subset(km_mrank_danceability_se, select = c (mean_rank, danceability))

km_mrank_danceability_se = as.data.frame(unclass(km_mrank_danceability_se))
km_mrank_danceability_se = na.omit(km_mrank_danceability_se)



############popu vs tempo
km_popu_tempo_se = main_df_rankInfo
km_popu_tempo_se = subset(km_popu_tempo_se, select = c (popularity, tempo))

km_popu_tempo_se = as.data.frame(unclass(km_popu_tempo_se))
km_popu_tempo_se = na.omit(km_popu_tempo_se)

############popu  vs energy
km_popu_energy_se = main_df_rankInfo
km_popu_energy_se = subset(km_popu_energy_se, select = c (popularity, energy))

km_popu_energy_se = as.data.frame(unclass(km_popu_energy_se))
km_popu_energy_se = na.omit(km_popu_energy_se)


############popu  vs dancebility
km_popu_danceability_se = main_df_rankInfo
km_popu_danceability_se = subset(km_popu_danceability_se, select = c (popularity, danceability))

km_popu_danceability_se = as.data.frame(unclass(km_popu_danceability_se))
km_popu_danceability_se = na.omit(km_popu_danceability_se)

############popu  vs acousticness
km_popu_acousticness_se = main_df_rankInfo
km_popu_acousticness_se = subset(km_popu_acousticness_se, select = c (popularity, acousticness))

km_popu_acousticness_se = as.data.frame(unclass(km_popu_acousticness_se))
km_popu_acousticness_se = na.omit(km_popu_acousticness_se)

############popu  vs instrumentalness
km_popu_instrumentalness_se = main_df_rankInfo
km_popu_instrumentalness_se = subset(km_popu_instrumentalness_se, select = c (popularity, instrumentalness))

km_popu_instrumentalness_se = as.data.frame(unclass(km_popu_instrumentalness_se))
km_popu_instrumentalness_se = na.omit(km_popu_instrumentalness_se)

############popu  vs liveness
km_popu_liveness_se = main_df_rankInfo
km_popu_liveness_se = subset(km_popu_liveness_se, select = c (popularity, liveness))

km_popu_liveness_se = as.data.frame(unclass(km_popu_liveness_se))
km_popu_liveness_se = na.omit(km_popu_liveness_se)

############popu  vs speechiness
km_popu_speechiness_se = main_df_rankInfo
km_popu_speechiness_se = subset(km_popu_speechiness_se, select = c (popularity, speechiness))

km_popu_speechiness_se = as.data.frame(unclass(km_popu_speechiness_se))
km_popu_speechiness_se = na.omit(km_popu_speechiness_se)

############popu  vs time_signature
km_popu_time_signature_se = main_df_rankInfo
km_popu_time_signature_se = subset(km_popu_time_signature_se, select = c (popularity, time_signature))

km_popu_time_signature_se = as.data.frame(unclass(km_popu_time_signature_se))
km_popu_time_signature_se = na.omit(km_popu_time_signature_se)

############popu  vs loudness_sqr
km_popu_loudness_sqr_se = main_df_rankInfo
km_popu_loudness_sqr_se = subset(km_popu_loudness_sqr_se, select = c (popularity, loudness_sqr))

km_popu_loudness_sqr_se = as.data.frame(unclass(km_popu_loudness_sqr_se))
km_popu_loudness_sqr_se = na.omit(km_popu_loudness_sqr_se)
# summary(kmeanclean)
# dim(kmeanclean)

#Data Coeifficient
#psych::pairs.panels(kmeanclean, lm=TRUE)
####################################all arrtibutes from main_df##############
wss = rep(NA, 10)
for (k in c(1:10)){
  wss[k] = kmeans(kmeanclean, k, nstart = 10)$tot.withinss
}
cexs = rep(1, length(wss))
cexs[3] = 2
pchs = rep(1, length(wss))
pchs[3] = 2
cols = rep('black', length(wss))
cols[3] = 'red'
plot(wss, type = 'b', xlab = 'number of clusters, k', ylab = 'within cluster sum of squares distance', col = cols, pch = pchs, cex = cexs)

####################################rank, popu from main_df##############
wss = rep(NA, 10)
for (k in c(1:10)){
  wss[k] = kmeans(km_rank_popu, k, nstart = 20)$tot.withinss
}
cexs = rep(1, length(wss))
cexs[3] = 2
pchs = rep(1, length(wss))
pchs[3] = 2
cols = rep('black', length(wss))
cols[3] = 'red'
plot(wss, type = 'b', xlab = 'number of clusters, k', ylab = 'within cluster sum of squares distance', col = cols, pch = pchs, cex = cexs)

####################################rank, popu from main_df_rankInfo##############
wss = rep(NA, 10)
for (k in c(1:10)){
  wss[k] = kmeans(km_rank_popu_se, k, nstart = 20)$tot.withinss
}
cexs = rep(1, length(wss))
cexs[3] = 2
pchs = rep(1, length(wss))
pchs[3] = 2
cols = rep('black', length(wss))
cols[3] = 'red'
plot(wss, type = 'b', xlab = 'number of clusters, k', ylab = 'within cluster sum of squares distance', col = cols, pch = pchs, cex = cexs)

km = kmeans(kmeanclean, center = 3, nstart = 10)

fviz_cluster(km, data = kmeanclean,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot for full attrubutes")

km$size
## [1] 36248 73904  3928
km$centers
##   length_sec popularity danceability acousticness    energy instrumentalness
## 1   272.2515   62.99777    0.6320322    0.1696465 0.6645877      0.011901021
## 2   202.8012   66.89704    0.6465023    0.1566890 0.7040621      0.005969527
## 3   212.5546   50.72989    0.6163727    0.4662695 0.3590666      0.109241973
##    liveness loudness_sqr speechiness    tempo time_signature  hit_song
## 1 0.1779282     41.86203  0.10361027 119.9822       3.969322 0.5604447
## 2 0.1790932     34.13387  0.09622412 122.1968       3.979717 0.5555180
## 3 0.1503367    189.87927  0.14082576 115.5980       3.924389 0.5152749
#############################rank vs popu main_df full dataset##################
km_r_p = kmeans(km_rank_popu, center = 3, nstart = 10)

fviz_cluster(km_r_p, data = km_rank_popu,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot")

km_r_p$size
## [1] 37906 38654 37520
km_r_p$centers
##       rank popularity
## 1 49.66852   63.02971
## 2 83.51798   59.51118
## 3 17.32751   72.95362
#############################rank vs popu main_df_selected ###############
km_r_p_se = kmeans(km_rank_popu_se, center = 3, nstart = 10)

fviz_cluster(km_r_p_se, data = km_rank_popu_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot")

ggplot(data = km_rank_popu_se, aes(y = popularity)) +
  geom_bar(aes(fill = rank)) +
  ggtitle("Count of Clusters by populatrity") +
  theme(plot.title = element_text(hjust = 0.5))

km_r_p_se$size
## [1] 2829 4788 1579
km_r_p_se$centers
##       rank popularity
## 1 49.18416   70.16896
## 2 89.69236   65.83751
## 3 87.08360   32.50348
#############################mean rank vs tempo main_df_selected###############
km_mr_t_se = kmeans(km_mrank_tempo_se, center = 3, nstart = 10)

fviz_cluster(km_mr_t_se, data = km_mrank_tempo_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - mean rank vs tempo")

km_mr_t_se$size
## [1] 3488 3339 2369
km_mr_t_se$centers
##   mean_rank     tempo
## 1  77.50836  99.62964
## 2  70.60294 152.97749
## 3  34.47516 112.61105
#############################mean rank vs energy main_df_selected###############
km_mr_e_se = kmeans(km_mrank_energy_se, center = 3, nstart = 10)

fviz_cluster(km_mr_e_se, data = km_mrank_energy_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - mean rank vs energy")

km_mr_e_se$size
## [1] 3469 3626 2101
km_mr_e_se$centers
##   mean_rank    energy
## 1  61.24892 0.6660514
## 2  86.33582 0.6730016
## 3  29.62298 0.6658422
#############################mean rank vs tempo main_df_selected###############
km_mr_d_se = kmeans(km_mrank_danceability_se, center = 3, nstart = 10)

fviz_cluster(km_mr_d_se, data = km_mrank_danceability_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - mean rank vs danceability")

km_mr_d_se$size
## [1] 3469 3626 2101
km_mr_d_se$centers
##   mean_rank danceability
## 1  61.24892    0.6288553
## 2  86.33582    0.6306811
## 3  29.62298    0.6554146
#############################popularity vs tempo main_df_selected###############
km_p_t_se = kmeans(km_popu_tempo_se, center = 3, nstart = 10)

fviz_cluster(km_p_t_se, data = km_popu_tempo_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs tempo")

km_p_t_se$size
## [1] 3571 1878 3747
km_p_t_se$centers
##   popularity     tempo
## 1   61.26407  92.36677
## 2   61.22737 165.75839
## 3   61.72992 129.15388
#############################popularity vs energy main_df_selected###############
km_p_e_se = kmeans(km_popu_energy_se, center = 3, nstart = 10)

fviz_cluster(km_p_e_se, data = km_popu_energy_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs energy")

km_p_e_se$size
## [1]  835 3751 4610
km_p_e_se$centers
##   popularity    energy
## 1   21.50539 0.6330150
## 2   54.60491 0.6873781
## 3   74.24751 0.6600538
#######################popularity vs danceability main_df_selected###############
km_p_d_se = kmeans(km_popu_danceability_se, center = 3, nstart = 10)

fviz_cluster(km_p_d_se, data = km_popu_danceability_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs dancebility")

km_p_d_se$size
## [1] 4825 3625  746
km_p_d_se$centers
##   popularity danceability
## 1   73.79088    0.6512960
## 2   53.62731    0.6226242
## 3   19.59920    0.5976662
#######################popularity vs acousticness main_df_selected###############
km_p_a_se = kmeans(km_popu_acousticness_se, center = 3, nstart = 10)

fviz_cluster(km_p_a_se, data = km_popu_acousticness_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs acousticness")

km_p_a_se$size
## [1]  792 4610 3794
km_p_a_se$centers
##   popularity acousticness
## 1   20.60985    0.2432862
## 2   74.24751    0.1890779
## 3   54.41671    0.1734073
#######################popularity vs instrumentalness main_df_selected###############
km_p_i_se = kmeans(km_popu_instrumentalness_se, center = 3, nstart = 10)

fviz_cluster(km_p_i_se, data = km_popu_instrumentalness_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs instrumentalness")

km_p_i_se$size
## [1]  746 3625 4825
km_p_i_se$centers
##   popularity instrumentalness
## 1   19.59920      0.061512060
## 2   53.62731      0.009727447
## 3   73.79088      0.009557970
#######################popularity vs liveness main_df_selected###############
km_p_l_se = kmeans(km_popu_liveness_se, center = 3, nstart = 10)

fviz_cluster(km_p_l_se, data = km_popu_liveness_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs liveness")

km_p_l_se$size
## [1] 4825 3625  746
km_p_l_se$centers
##   popularity  liveness
## 1   73.79088 0.1817304
## 2   53.62731 0.1856043
## 3   19.59920 0.1800791
#######################popularity vs speechiness main_df_selected###############
km_p_s_se = kmeans(km_popu_speechiness_se, center = 3, nstart = 10)

fviz_cluster(km_p_s_se, data = km_popu_speechiness_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs speechiness")

km_p_s_se$size
## [1]  792 3579 4825
km_p_s_se$centers
##   popularity speechiness
## 1   20.60985  0.09440619
## 2   53.84102  0.11239626
## 3   73.79088  0.11558524
#######################popularity vs time_signature main_df_selected###############
km_p_ts_se = kmeans(km_popu_time_signature_se, center = 3, nstart = 10)

fviz_cluster(km_p_s_se, data = km_popu_time_signature_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs time_signature")

km_p_ts_se$size
## [1]  746 4825 3625
km_p_ts_se$centers
##   popularity time_signature
## 1   19.59920       3.912869
## 2   73.79088       3.976166
## 3   53.62731       3.957793
#######################popularity vs loudness_sqr main_df_selected###############
km_p_lsqr_se = kmeans(km_popu_loudness_sqr_se, center = 3, nstart = 10)

fviz_cluster(km_p_s_se, data = km_popu_loudness_sqr_se,
             palette = c("#00AFBB","#2E9FDF", "#FC4E07"),
             ggtheme = theme_minimal(),
             main = "clusters Plot - popularity vs loudness_sqr")

km_p_lsqr_se$size
## [1]  942   22 8232
km_p_lsqr_se$centers
##   popularity loudness_sqr
## 1   57.89915    131.86876
## 2   36.36364    758.76668
## 3   61.91934     33.98379

Plots and model selection

######################Residual vs fitted plot for best rank################# 
fit_bestr = lm(best_rank ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature, main_df_rankInfo)
est = summary(fit_bestr)
est
## 
## Call:
## lm(formula = best_rank ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_rankInfo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.177 -26.005   0.587  25.390  61.387 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       72.869146   5.232418  13.926  < 2e-16 ***
## length_sec        -0.041967   0.006091  -6.890 5.93e-12 ***
## danceability     -15.626827   2.249142  -6.948 3.96e-12 ***
## acousticness       4.089653   1.712396   2.388  0.01695 *  
## energy             5.716021   2.451599   2.332  0.01975 *  
## instrumentalness   5.947487   3.750711   1.586  0.11284    
## liveness          -2.502010   2.255505  -1.109  0.26733    
## loudness_sqr       0.014054   0.007638   1.840  0.06580 .  
## speechiness       13.536647   2.849669   4.750 2.06e-06 ***
## tempo              0.014695   0.010686   1.375  0.16911    
## time_signature    -3.241455   1.061180  -3.055  0.00226 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.52 on 9185 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0165, Adjusted R-squared:  0.01543 
## F-statistic: 15.41 on 10 and 9185 DF,  p-value: < 2.2e-16
plot(fit_bestr,1)

plot(fit_bestr,2)

######################Residual vs fitted plot for mean rank#################
fit_meanr = lm(mean_rank ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature, main_df_rankInfo)
est1 = summary(fit_meanr)
est1
## 
## Call:
## lm(formula = mean_rank ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_rankInfo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.933 -16.013   3.184  18.531  43.530 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       80.341679   4.078897  19.697  < 2e-16 ***
## length_sec        -0.031301   0.004748  -6.593 4.56e-11 ***
## danceability     -11.961888   1.753303  -6.822 9.51e-12 ***
## acousticness       2.296403   1.334887   1.720 0.085412 .  
## energy             6.458975   1.911128   3.380 0.000729 ***
## instrumentalness   4.102092   2.923842   1.403 0.160657    
## liveness          -1.736878   1.758263  -0.988 0.323258    
## loudness_sqr       0.008927   0.005954   1.499 0.133837    
## speechiness        8.542687   2.221440   3.846 0.000121 ***
## tempo              0.010851   0.008330   1.303 0.192742    
## time_signature    -2.286121   0.827236  -2.764 0.005729 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.01 on 9185 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01439,    Adjusted R-squared:  0.01332 
## F-statistic: 13.41 on 10 and 9185 DF,  p-value: < 2.2e-16
plot(fit_meanr,1)

plot(fit_meanr,2)

#################meain_df_rankInfo -> balanced df with unique id#############
fit_popu_se = lm(popularity ~ length_sec  + danceability + acousticness+energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature, main_df_rankInfo)
est2 = summary(fit_popu_se)
est2
## 
## Call:
## lm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_rankInfo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.169  -8.161   2.728  11.482  51.198 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       55.166141   3.026825  18.226  < 2e-16 ***
## length_sec        -0.013655   0.003523  -3.876 0.000107 ***
## danceability      10.401822   1.301073   7.995 1.45e-15 ***
## acousticness      -1.424120   0.990579  -1.438 0.150563    
## energy            -6.318448   1.418190  -4.455 8.48e-06 ***
## instrumentalness -22.415503   2.169694 -10.331  < 2e-16 ***
## liveness          -0.527758   1.304754  -0.404 0.685863    
## loudness_sqr      -0.024459   0.004418  -5.536 3.19e-08 ***
## speechiness        0.502423   1.648463   0.305 0.760538    
## tempo              0.008563   0.006182   1.385 0.166001    
## time_signature     1.920503   0.613867   3.129 0.001762 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.08 on 9185 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.04039,    Adjusted R-squared:  0.03935 
## F-statistic: 38.66 on 10 and 9185 DF,  p-value: < 2.2e-16
plot(fit_popu_se,1)

plot(fit_popu_se,2)

###########Residual vs fitted plot for popularity full dataframe#################
fit_popu_full = lm(popularity ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature, main_df_selected)
est3 = summary(fit_popu_full)
est3
## 
## Call:
## lm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_selected)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.035  -7.414   2.728  11.434 136.428 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       72.234234   0.959912  75.251  < 2e-16 ***
## length_sec        -0.029050   0.001082 -26.858  < 2e-16 ***
## danceability       7.354951   0.378333  19.440  < 2e-16 ***
## acousticness      -1.751226   0.287670  -6.088 1.15e-09 ***
## energy           -12.063062   0.424370 -28.426  < 2e-16 ***
## instrumentalness -29.702758   0.662505 -44.834  < 2e-16 ***
## liveness          -1.360788   0.370990  -3.668 0.000245 ***
## loudness_sqr      -0.075493   0.001668 -45.265  < 2e-16 ***
## speechiness       -2.468287   0.502704  -4.910 9.12e-07 ***
## tempo              0.003465   0.001767   1.961 0.049851 *  
## time_signature     1.703218   0.198628   8.575  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.73 on 114069 degrees of freedom
##   (20 observations deleted due to missingness)
## Multiple R-squared:  0.0674, Adjusted R-squared:  0.06732 
## F-statistic: 824.4 on 10 and 114069 DF,  p-value: < 2.2e-16
plot(fit_popu_full,1)

plot(fit_popu_full,2)

###########Residual vs fitted plot for popularity hit_song, main_df_hit_song_rank##
fit_popu_hit = lm(popularity ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature + hit_song , main_df_hit_song_rank)
## glm
# fit_popu_hit = glm(hit_song ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature, family = binomial, data = main_df_hit_song_rank) 

est3 = summary(fit_popu_hit)
est3
## 
## Call:
## lm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature + hit_song, data = main_df_hit_song_rank)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -68.012  -7.668   2.404  10.939  53.011 
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       69.996144   4.600240  15.216  < 2e-16 ***
## length_sec        -0.029878   0.004972  -6.009 2.00e-09 ***
## danceability       6.320969   1.798746   3.514 0.000445 ***
## acousticness      -3.018748   1.373178  -2.198 0.027969 *  
## energy           -13.575060   1.994524  -6.806 1.12e-11 ***
## instrumentalness -30.418741   3.273268  -9.293  < 2e-16 ***
## liveness          -0.869077   1.727677  -0.503 0.614964    
## loudness_sqr      -0.087609   0.007862 -11.144  < 2e-16 ***
## speechiness       -1.464652   2.380219  -0.615 0.538356    
## tempo              0.009406   0.008291   1.135 0.256607    
## time_signature     2.319107   0.965614   2.402 0.016356 *  
## hit_song                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.49 on 4958 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07716,    Adjusted R-squared:  0.0753 
## F-statistic: 41.45 on 10 and 4958 DF,  p-value: < 2.2e-16
plot(fit_popu_hit,1)

plot(fit_popu_hit,2)

###########Residual vs fitted plot for best rank hit_song, main_df_hit_song_rank##
fit_bestr_hit = lm(best_rank ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature + hit_song, main_df_hit_song_rank)
est3 = summary(fit_bestr_hit)
est3
## 
## Call:
## lm(formula = best_rank ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature + hit_song, data = main_df_hit_song_rank)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.425 -20.735  -3.114  17.766  65.454 
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       42.008205   6.539866   6.423 1.46e-10 ***
## length_sec        -0.005363   0.007069  -0.759  0.44805    
## danceability     -17.019107   2.557162  -6.655 3.13e-11 ***
## acousticness       3.737787   1.952159   1.915  0.05559 .  
## energy            15.009944   2.835487   5.294 1.25e-07 ***
## instrumentalness  -5.969435   4.653395  -1.283  0.19962    
## liveness          -3.508634   2.456127  -1.429  0.15320    
## loudness_sqr       0.036025   0.011177   3.223  0.00128 ** 
## speechiness      -11.112292   3.383805  -3.284  0.00103 ** 
## tempo              0.003027   0.011786   0.257  0.79729    
## time_signature    -2.006941   1.372751  -1.462  0.14381    
## hit_song                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.44 on 4958 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02537,    Adjusted R-squared:  0.02341 
## F-statistic: 12.91 on 10 and 4958 DF,  p-value: < 2.2e-16
plot(fit_bestr_hit,1)

plot(fit_bestr_hit,2)

###########Residual vs fitted plot for mean rank hit_song, main_df_hit_song_rank##
fit_meanr_hit = lm(mean_rank ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr+ speechiness+ tempo + time_signature + hit_song, main_df_hit_song_rank)
est3 = summary(fit_meanr_hit)
est3
## 
## Call:
## lm(formula = mean_rank ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature + hit_song, data = main_df_hit_song_rank)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.411 -18.759   1.869  17.104  50.617 
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       67.729232   6.062948  11.171  < 2e-16 ***
## length_sec        -0.014974   0.006553  -2.285   0.0224 *  
## danceability     -18.015832   2.370681  -7.599 3.53e-14 ***
## acousticness       3.321095   1.809798   1.835   0.0666 .  
## energy            11.965756   2.628710   4.552 5.44e-06 ***
## instrumentalness  -3.269344   4.314047  -0.758   0.4486    
## liveness          -2.551851   2.277014  -1.121   0.2625    
## loudness_sqr       0.023972   0.010362   2.314   0.0207 *  
## speechiness       -6.504176   3.137042  -2.073   0.0382 *  
## tempo              0.005648   0.010927   0.517   0.6052    
## time_signature    -2.198693   1.272644  -1.728   0.0841 .  
## hit_song                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.73 on 4958 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02604,    Adjusted R-squared:  0.02408 
## F-statistic: 13.26 on 10 and 4958 DF,  p-value: < 2.2e-16
plot(fit_meanr_hit,1)

plot(fit_meanr_hit,2)

BPtest and SE fix

#Breusch-Pagan test, Residual plot is fan-shaped, p-value is less than 2.2e-16  
bptest(fit_popu_se)
## 
##  studentized Breusch-Pagan test
## 
## data:  fit_popu_se
## BP = 285.41, df = 10, p-value < 2.2e-16
#Conducted Whitehuber Robust Standard Error to fix
coeftest(fit_popu_se, vcov = sandwich)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)       55.1661413   3.2172645 17.1469 < 2.2e-16 ***
## length_sec        -0.0136548   0.0039180 -3.4851 0.0004943 ***
## danceability      10.4018216   1.3807554  7.5334 5.410e-14 ***
## acousticness      -1.4241204   1.0735162 -1.3266 0.1846759    
## energy            -6.3184483   1.6745818 -3.7731 0.0001622 ***
## instrumentalness -22.4155029   3.6045229 -6.2187 5.231e-10 ***
## liveness          -0.5277583   1.3271377 -0.3977 0.6908852    
## loudness_sqr      -0.0244589   0.0085051 -2.8758 0.0040392 ** 
## speechiness        0.5024233   1.8006259  0.2790 0.7802303    
## tempo              0.0085634   0.0062052  1.3800 0.1676091    
## time_signature     1.9205034   0.6209093  3.0931 0.0019871 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#For remaining models
bptest(fit_bestr)
## 
##  studentized Breusch-Pagan test
## 
## data:  fit_bestr
## BP = 86.743, df = 10, p-value = 2.363e-14
coeftest(fit_bestr, vcov = sandwich)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)       72.8691457   4.8936805 14.8905 < 2.2e-16 ***
## length_sec        -0.0419668   0.0062875 -6.6746 2.622e-11 ***
## danceability     -15.6268267   2.2784436 -6.8586 7.405e-12 ***
## acousticness       4.0896533   1.7147866  2.3849  0.017102 *  
## energy             5.7160212   2.3860293  2.3956  0.016612 *  
## instrumentalness   5.9474875   3.7984292  1.5658  0.117436    
## liveness          -2.5020105   2.2445111 -1.1147  0.264998    
## loudness_sqr       0.0140541   0.0062628  2.2441  0.024852 *  
## speechiness       13.5366469   2.9395475  4.6050 4.180e-06 ***
## tempo              0.0146953   0.0107175  1.3712  0.170360    
## time_signature    -3.2414554   0.9571832 -3.3865  0.000711 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(fit_meanr)
## 
##  studentized Breusch-Pagan test
## 
## data:  fit_meanr
## BP = 120.11, df = 10, p-value < 2.2e-16
coeftest(fit_meanr, vcov = sandwich)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)       80.3416794   3.6853244 21.8004 < 2.2e-16 ***
## length_sec        -0.0313009   0.0048237 -6.4889 9.091e-11 ***
## danceability     -11.9618879   1.7709055 -6.7547 1.518e-11 ***
## acousticness       2.2964026   1.3233777  1.7353 0.0827286 .  
## energy             6.4589753   1.8573467  3.4775 0.0005084 ***
## instrumentalness   4.1020922   2.9063172  1.4114 0.1581488    
## liveness          -1.7368779   1.7483057 -0.9935 0.3205103    
## loudness_sqr       0.0089270   0.0047459  1.8810 0.0600062 .  
## speechiness        8.5426874   2.2093733  3.8666 0.0001111 ***
## tempo              0.0108511   0.0082342  1.3178 0.1876015    
## time_signature    -2.2861208   0.7097092 -3.2212 0.0012810 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(fit_popu_full)
## 
##  studentized Breusch-Pagan test
## 
## data:  fit_popu_full
## BP = 4440.1, df = 10, p-value < 2.2e-16
coeftest(fit_popu_full, vcov = sandwich)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept)       72.2342340   1.0212968  70.7280 < 2.2e-16 ***
## length_sec        -0.0290502   0.0012536 -23.1727 < 2.2e-16 ***
## danceability       7.3549512   0.4029308  18.2536 < 2.2e-16 ***
## acousticness      -1.7512260   0.3244854  -5.3969 6.792e-08 ***
## energy           -12.0630617   0.5784972 -20.8524 < 2.2e-16 ***
## instrumentalness -29.7027580   1.1987205 -24.7787 < 2.2e-16 ***
## liveness          -1.3607881   0.3898577  -3.4905 0.0004823 ***
## loudness_sqr      -0.0754932   0.0037555 -20.1018 < 2.2e-16 ***
## speechiness       -2.4682870   0.6142439  -4.0184 5.863e-05 ***
## tempo              0.0034650   0.0018106   1.9137 0.0556608 .  
## time_signature     1.7032182   0.1969114   8.6497 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Backward Step Wise

# #Backward stepwise for best rank
# step(fit_bestr, direction = 'backward')
# #Backward stepwise for mean rank
# step(fit_meanr, direction = 'backward')
# #Backward stepwise for popularity
# step(fit_popu_full, direction = 'backward')

#Backward stepwise for popularity
step(fit_popu_se, direction = 'backward')
## Start:  AIC=52201.44
## popularity ~ length_sec + danceability + acousticness + energy + 
##     instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature
## 
##                    Df Sum of Sq     RSS   AIC
## - speechiness       1      27.1 2678264 52200
## - liveness          1      47.7 2678285 52200
## - tempo             1     559.6 2678797 52201
## <none>                          2678237 52201
## - acousticness      1     602.7 2678840 52202
## - time_signature    1    2854.0 2681091 52209
## - length_sec        1    4379.8 2682617 52214
## - energy            1    5787.9 2684025 52219
## - loudness_sqr      1    8935.2 2687172 52230
## - danceability      1   18637.4 2696874 52263
## - instrumentalness  1   31122.2 2709359 52306
## 
## Step:  AIC=52199.53
## popularity ~ length_sec + danceability + acousticness + energy + 
##     instrumentalness + liveness + loudness_sqr + tempo + time_signature
## 
##                    Df Sum of Sq     RSS   AIC
## - liveness          1      39.2 2678303 52198
## <none>                          2678264 52200
## - tempo             1     590.1 2678854 52200
## - acousticness      1     606.8 2678871 52200
## - time_signature    1    2869.6 2681134 52207
## - length_sec        1    4386.2 2682650 52213
## - energy            1    5801.0 2684065 52217
## - loudness_sqr      1    8911.8 2687176 52228
## - danceability      1   20738.2 2699002 52268
## - instrumentalness  1   31386.7 2709651 52305
## 
## Step:  AIC=52197.67
## popularity ~ length_sec + danceability + acousticness + energy + 
##     instrumentalness + loudness_sqr + tempo + time_signature
## 
##                    Df Sum of Sq     RSS   AIC
## <none>                          2678303 52198
## - tempo             1     590.8 2678894 52198
## - acousticness      1     604.5 2678908 52198
## - time_signature    1    2876.2 2681179 52206
## - length_sec        1    4392.1 2682695 52211
## - energy            1    6004.6 2684308 52216
## - loudness_sqr      1    8948.5 2687252 52226
## - danceability      1   20998.7 2699302 52267
## - instrumentalness  1   31358.8 2709662 52303
## 
## Call:
## lm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + loudness_sqr + tempo + time_signature, 
##     data = main_df_rankInfo)
## 
## Coefficients:
##      (Intercept)        length_sec      danceability      acousticness  
##        55.029876         -0.013673         10.550022         -1.426060  
##           energy  instrumentalness      loudness_sqr             tempo  
##        -6.387706        -22.446890         -0.024399          0.008755  
##   time_signature  
##         1.927292
step(fit_popu_full, direction = 'backward')
## Start:  AIC=642804.4
## popularity ~ length_sec + danceability + acousticness + energy + 
##     instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature
## 
##                    Df Sum of Sq      RSS    AIC
## <none>                          31932776 642804
## - tempo             1      1077 31933853 642806
## - liveness          1      3766 31936542 642816
## - speechiness       1      6749 31939525 642827
## - acousticness      1     10374 31943151 642839
## - time_signature    1     20584 31953360 642876
## - danceability      1    105799 32038575 643180
## - length_sec        1    201933 32134709 643522
## - energy            1    226201 32158977 643608
## - instrumentalness  1    562710 32495486 644795
## - loudness_sqr      1    573570 32506346 644833
## 
## Call:
## lm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_selected)
## 
## Coefficients:
##      (Intercept)        length_sec      danceability      acousticness  
##        72.234234         -0.029050          7.354951         -1.751226  
##           energy  instrumentalness          liveness      loudness_sqr  
##       -12.063062        -29.702758         -1.360788         -0.075493  
##      speechiness             tempo    time_signature  
##        -2.468287          0.003465          1.703218

Result from backward step wise:

fit_popu_se

formula = popularity ~ length_sec + danceability + energy + instrumentalness + loudness_sqr + tempo + time_signature, data = main_df_rankInfo

fit_popu_full

formula = popularity ~ length_sec + danceability + acousticness + energy + instrumentalness + liveness + loudness_sqr + speechiness + tempo + time_signature, data = main_df_selected

###############################################################################################APPENDIX - Panel Analysis - For Reference ONLY#########################

1.1 Fix effect model

# fixed effect estimation

#####mean_df_rankInfo optimised arrtibutes for unique song#########################

fe = plm(popularity ~ length_sec + danceability + energy + instrumentalness + loudness_sqr + tempo + time_signature, data = main_df_rankInfo,
          index = c("rank", "date"),
          model = "within", effect = "twoways")

#####mean_df_rankInfo full arrtibutes for unique song#########################

fe_full_attri = plm(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_rankInfo,
          index = c("rank", "date"),
          model = "within", effect = "twoways")


######mean_df_selected full list of songs#########################

fe_full_list = plm(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_selected,
          index = c("rank", "date"),
          model = "within", effect = "twoways")


 summary(fe)
## Twoways effects Within Model
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + energy + 
##     instrumentalness + loudness_sqr + tempo + time_signature, 
##     data = main_df_rankInfo, effect = "twoways", model = "within", 
##     index = c("rank", "date"))
## 
## Unbalanced Panel: n = 100, T = 1-528, N = 9196
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -68.4132  -6.6362   1.0305   8.4240  54.6898 
## 
## Coefficients:
##                     Estimate  Std. Error t-value  Pr(>|t|)    
## length_sec         0.0153047   0.0034727  4.4072 1.061e-05 ***
## danceability       3.9085208   1.1831626  3.3035 0.0009592 ***
## energy             3.7931810   1.2188156  3.1122 0.0018637 ** 
## instrumentalness -20.5828000   2.0621303 -9.9813 < 2.2e-16 ***
## loudness_sqr      -0.0120165   0.0041756 -2.8778 0.0040150 ** 
## tempo             -0.0064336   0.0058259 -1.1043 0.2694942    
## time_signature     0.8600519   0.5798146  1.4833 0.1380284    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1847700
## Residual Sum of Squares: 1790000
## R-Squared:      0.031248
## Adj. R-Squared: -0.1206
## F-statistic: 36.6285 on 7 and 7949 DF, p-value: < 2.22e-16
 summary(fe_full_attri)
## Twoways effects Within Model
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_rankInfo, effect = "twoways", 
##     model = "within", index = c("rank", "date"))
## 
## Unbalanced Panel: n = 100, T = 1-528, N = 9196
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -68.71762  -6.57234   0.96278   8.46597  54.29075 
## 
## Coefficients:
##                     Estimate  Std. Error  t-value  Pr(>|t|)    
## length_sec         0.0159194   0.0034782   4.5769 4.792e-06 ***
## danceability       4.9366297   1.2492918   3.9515 7.832e-05 ***
## acousticness      -0.6620354   0.9434598  -0.7017  0.482880    
## energy             3.6944469   1.3715529   2.6936  0.007083 ** 
## instrumentalness -21.1919410   2.0647864 -10.2635 < 2.2e-16 ***
## liveness          -0.8209202   1.2320516  -0.6663  0.505237    
## loudness_sqr      -0.0101767   0.0042037  -2.4209  0.015506 *  
## speechiness       -6.6685201   1.6015180  -4.1639 3.162e-05 ***
## tempo             -0.0043808   0.0058527  -0.7485  0.454178    
## time_signature     0.8878747   0.5797613   1.5314  0.125698    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1847700
## Residual Sum of Squares: 1785600
## R-Squared:      0.033581
## Adj. R-Squared: -0.11833
## F-statistic: 27.6103 on 10 and 7946 DF, p-value: < 2.22e-16
 summary(fe_full_list)
## Twoways effects Within Model
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_selected, effect = "twoways", 
##     model = "within", index = c("rank", "date"))
## 
## Unbalanced Panel: n = 100, T = 1138-1141, N = 114080
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -80.2919  -5.9310   1.6776   8.7478 105.2196 
## 
## Coefficients:
##                     Estimate  Std. Error  t-value  Pr(>|t|)    
## length_sec         0.0086538   0.0010175   8.5052 < 2.2e-16 ***
## danceability       1.1799206   0.3429004   3.4410 0.0005798 ***
## acousticness      -1.4312663   0.2579138  -5.5494 2.873e-08 ***
## energy            -1.5132730   0.3866188  -3.9141 9.079e-05 ***
## instrumentalness -29.5212380   0.5936833 -49.7256 < 2.2e-16 ***
## liveness          -1.2200173   0.3326573  -3.6675 0.0002451 ***
## loudness_sqr      -0.0502589   0.0015033 -33.4314 < 2.2e-16 ***
## speechiness       -5.6202050   0.4532216 -12.4006 < 2.2e-16 ***
## tempo             -0.0086922   0.0015848  -5.4847 4.151e-08 ***
## time_signature     1.1575203   0.1777861   6.5107 7.509e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    26598000
## Residual Sum of Squares: 25036000
## R-Squared:      0.058748
## Adj. R-Squared: 0.048328
## F-statistic: 704.222 on 10 and 112830 DF, p-value: < 2.22e-16
#An important feature of panel data is that there is often correlation between the errors within an entity. This could be due to serial correlation, i.e., persistent effects for all temporal observations for an entity. When errors are correlated, the conventional estimates of the standard errors are incorrect. In many cases the estimated SE are too small, which leads to excessive confidence (biased) in the precision of the results. Fortunately, we can correct the SE's for the existence of correlation across time periods within the entities using clustered standard errors 

pwartest(fe) 
## 
##  Wooldridge's test for serial correlation in FE panels
## 
## data:  fe
## F = 0.27842, df1 = 1, df2 = 9094, p-value = 0.5977
## alternative hypothesis: serial correlation
fe.robust = coeftest(fe, vcov. = vcovHC, type = "HC1")
print(fe.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## length_sec         0.0153047   0.0038451  3.9804 6.942e-05 ***
## danceability       3.9085208   1.2928563  3.0232  0.002509 ** 
## energy             3.7931810   1.2130933  3.1269  0.001773 ** 
## instrumentalness -20.5828000   2.8653030 -7.1835 7.411e-13 ***
## loudness_sqr      -0.0120165   0.0068539 -1.7533  0.079597 .  
## tempo             -0.0064336   0.0059906 -1.0739  0.282880    
## time_signature     0.8600519   0.4706325  1.8274  0.067671 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pwartest(fe_full_attri) 
## 
##  Wooldridge's test for serial correlation in FE panels
## 
## data:  fe_full_attri
## F = 0.34746, df1 = 1, df2 = 9094, p-value = 0.5556
## alternative hypothesis: serial correlation
fe_full_attri.robust = coeftest(fe_full_attri, vcov. = vcovHC, type = "HC1")
print(fe_full_attri.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## length_sec         0.0159194   0.0038500  4.1349 3.588e-05 ***
## danceability       4.9366297   1.3274056  3.7190 0.0002014 ***
## acousticness      -0.6620354   1.1028419 -0.6003 0.5483238    
## energy             3.6944469   1.2591876  2.9340 0.0033559 ** 
## instrumentalness -21.1919410   2.9293981 -7.2342 5.118e-13 ***
## liveness          -0.8209202   1.2096918 -0.6786 0.4973989    
## loudness_sqr      -0.0101767   0.0068541 -1.4848 0.1376470    
## speechiness       -6.6685201   1.4816300 -4.5008 6.866e-06 ***
## tempo             -0.0043808   0.0060782 -0.7207 0.4710930    
## time_signature     0.8878747   0.4658222  1.9060 0.0566813 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pwartest(fe_full_list) 
## 
##  Wooldridge's test for serial correlation in FE panels
## 
## data:  fe_full_list
## F = 58.015, df1 = 1, df2 = 113978, p-value = 2.621e-14
## alternative hypothesis: serial correlation
fe_full_list.robust = coeftest(fe_full_list, vcov. = vcovHC, type = "HC1")
print(fe_full_list.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error  t value  Pr(>|t|)    
## length_sec         0.0086538   0.0012901   6.7081 1.981e-11 ***
## danceability       1.1799206   0.5165671   2.2842 0.0223641 *  
## acousticness      -1.4312663   0.3822364  -3.7445 0.0001809 ***
## energy            -1.5132730   0.5532013  -2.7355 0.0062298 ** 
## instrumentalness -29.5212380   2.3766014 -12.4216 < 2.2e-16 ***
## liveness          -1.2200173   0.4771780  -2.5567 0.0105673 *  
## loudness_sqr      -0.0502589   0.0050629  -9.9269 < 2.2e-16 ***
## speechiness       -5.6202050   0.6574608  -8.5483 < 2.2e-16 ***
## tempo             -0.0086922   0.0022694  -3.8301 0.0001281 ***
## time_signature     1.1575203   0.1691942   6.8414 7.884e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1.2 Plots

fitted_fe = fe$model[[1]] - fe$residuals

residplot = as.data.frame(cbind(fitted_fe, fe$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Unique song with selected attributes")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

fitted_fe_full_attri = fe_full_attri$model[[1]] - fe_full_attri$residuals

residplot = as.data.frame(cbind(fitted_fe_full_attri, fe_full_attri$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Unique song with full attributes")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

fitted_fe_full_list = fe_full_list$model[[1]] - fe_full_list$residuals

residplot = as.data.frame(cbind(fitted_fe_full_list, fe_full_list$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Full song list")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

1.3 Variance Inflation Factor

print("mean_df_rankInfo optimised arrtibutes for unique song")
## [1] "mean_df_rankInfo optimised arrtibutes for unique song"
vif(fe)
##       length_sec     danceability           energy instrumentalness 
##         1.017465         1.066047         1.568317         1.208277 
##     loudness_sqr            tempo   time_signature 
##         1.800642         1.041412         1.041869
print("mean_df_rankInfo full arrtibutes for unique song")
## [1] "mean_df_rankInfo full arrtibutes for unique song"
vif(fe_full_attri)
##       length_sec     danceability     acousticness           energy 
##         1.022787         1.190964         1.560590         1.990060 
## instrumentalness         liveness     loudness_sqr      speechiness 
##         1.213858         1.049802         1.828740         1.094289 
##            tempo   time_signature 
##         1.053149         1.043798
print("mean_df_selected full list of songs")
## [1] "mean_df_selected full list of songs"
vif(fe_full_list)
##       length_sec     danceability     acousticness           energy 
##         1.021225         1.187353         1.457298         2.079824 
## instrumentalness         liveness     loudness_sqr      speechiness 
##         1.165116         1.041341         1.905160         1.105029 
##            tempo   time_signature 
##         1.062801         1.028661

2.1 Random Effects Model

#####mean_df_rankInfo optimised arrtibutes for unique song#########################

re = plm(popularity ~ length_sec + danceability + energy + instrumentalness + loudness_sqr + tempo + time_signature, data = main_df_rankInfo,
          index = c("rank", "date"),
          model = "random")

#####mean_df_rankInfo full arrtibutes for unique song#########################

re_full_attri = plm(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_rankInfo,
          index = c("rank", "date"),
          model = "random")


######mean_df_selected full list of songs#########################

re_full_list = plm(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_selected,
          index = c("rank", "date"),
          model = "random")

summary(re)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + energy + 
##     instrumentalness + loudness_sqr + tempo + time_signature, 
##     data = main_df_rankInfo, model = "random", index = c("rank", 
##         "date"))
## 
## Unbalanced Panel: n = 100, T = 1-528, N = 9196
## 
## Effects:
##                   var std.dev share
## idiosyncratic 273.006  16.523 0.962
## individual     10.711   3.273 0.038
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01906 0.53814 0.67239 0.62552 0.72877 0.78540 
## 
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -69.590  -7.888   2.137  -0.377  10.612  50.788 
## 
## Coefficients:
##                     Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)       56.5350005   2.8149260  20.0840 < 2.2e-16 ***
## length_sec        -0.0143515   0.0034272  -4.1875 2.820e-05 ***
## danceability      10.0795178   1.1809223   8.5353 < 2.2e-16 ***
## energy            -3.8324923   1.2217279  -3.1369  0.001707 ** 
## instrumentalness -22.5972452   2.1065905 -10.7269 < 2.2e-16 ***
## loudness_sqr      -0.0218455   0.0042698  -5.1162 3.117e-07 ***
## tempo              0.0098792   0.0059807   1.6519  0.098565 .  
## time_signature     1.5807367   0.5977709   2.6444  0.008184 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    3444500
## Residual Sum of Squares: 2522500
## R-Squared:      0.27067
## Adj. R-Squared: 0.27011
## Chisq: 371.479 on 7 DF, p-value: < 2.22e-16
summary(re_full_attri)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_rankInfo, model = "random", 
##     index = c("rank", "date"))
## 
## Unbalanced Panel: n = 100, T = 1-528, N = 9196
## 
## Effects:
##                   var std.dev share
## idiosyncratic 272.972  16.522 0.968
## individual      9.043   3.007 0.032
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01616 0.50699 0.64696 0.59975 0.70683 0.76746 
## 
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -69.629  -7.874   2.119  -0.378  10.596  50.498 
## 
## Coefficients:
##                     Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)       57.8748500   2.9723412  19.4711 < 2.2e-16 ***
## length_sec        -0.0145391   0.0034351  -4.2325 2.311e-05 ***
## danceability       9.3247191   1.2686483   7.3501 1.980e-13 ***
## acousticness      -1.2726428   0.9657063  -1.3178 0.1875585    
## energy            -4.6664229   1.3833650  -3.3732 0.0007429 ***
## instrumentalness -22.4710673   2.1130416 -10.6345 < 2.2e-16 ***
## liveness          -0.3267759   1.2700065  -0.2573 0.7969452    
## loudness_sqr      -0.0217286   0.0043038  -5.0487 4.449e-07 ***
## speechiness        1.7785377   1.6061823   1.1073 0.2681610    
## tempo              0.0086813   0.0060260   1.4406 0.1496875    
## time_signature     1.5501076   0.5986351   2.5894 0.0096143 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    3486900
## Residual Sum of Squares: 2525500
## R-Squared:      0.27892
## Adj. R-Squared: 0.27813
## Chisq: 374.573 on 10 DF, p-value: < 2.22e-16
summary(re_full_list)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = popularity ~ length_sec + danceability + acousticness + 
##     energy + instrumentalness + liveness + loudness_sqr + speechiness + 
##     tempo + time_signature, data = main_df_selected, model = "random", 
##     index = c("rank", "date"))
## 
## Unbalanced Panel: n = 100, T = 1138-1141, N = 114080
## 
## Effects:
##                   var std.dev share
## idiosyncratic 261.817  16.181 0.994
## individual      1.650   1.284 0.006
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6502  0.6506  0.6506  0.6505  0.6506  0.6506 
## 
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -74.12   -6.85    2.71    0.00   10.58  130.58 
## 
## Coefficients:
##                     Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)       75.0871517   0.9419409  79.7154 < 2.2e-16 ***
## length_sec        -0.0318794   0.0010512 -30.3258 < 2.2e-16 ***
## danceability       4.4876618   0.3695323  12.1442 < 2.2e-16 ***
## acousticness      -1.4316852   0.2794763  -5.1227 3.011e-07 ***
## energy           -10.5176541   0.4125817 -25.4923 < 2.2e-16 ***
## instrumentalness -30.2397848   0.6436829 -46.9793 < 2.2e-16 ***
## liveness          -1.4707692   0.3603248  -4.0818 4.469e-05 ***
## loudness_sqr      -0.0709723   0.0016209 -43.7844 < 2.2e-16 ***
## speechiness       -2.2530698   0.4883897  -4.6133 3.964e-06 ***
## tempo              0.0049571   0.0017163   2.8882  0.003875 ** 
## time_signature     1.2378352   0.1930118   6.4133 1.424e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    32234000
## Residual Sum of Squares: 30099000
## R-Squared:      0.066237
## Adj. R-Squared: 0.066155
## Chisq: 8091.07 on 10 DF, p-value: < 2.22e-16
re.robust = coeftest(re, vcov. = vcovHC, type = "HC1")
print(re.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)       56.5350005   3.0073798 18.7988 < 2.2e-16 ***
## length_sec        -0.0143515   0.0043172 -3.3242 0.0008901 ***
## danceability      10.0795178   1.4050021  7.1740 7.846e-13 ***
## energy            -3.8324923   1.5981461 -2.3981 0.0165008 *  
## instrumentalness -22.5972452   3.6150225 -6.2509 4.262e-10 ***
## loudness_sqr      -0.0218455   0.0081625 -2.6763 0.0074565 ** 
## tempo              0.0098792   0.0063476  1.5564 0.1196577    
## time_signature     1.5807367   0.4995012  3.1646 0.0015579 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
re_full_attri.robust = coeftest(re_full_attri, vcov. = vcovHC, type = "HC1")
print(re_full_attri.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)       57.8748500   2.7851755 20.7796 < 2.2e-16 ***
## length_sec        -0.0145391   0.0042948 -3.3853 0.0007141 ***
## danceability       9.3247191   1.4371906  6.4882 9.138e-11 ***
## acousticness      -1.2726428   1.1445493 -1.1119 0.2662033    
## energy            -4.6664229   1.6161634 -2.8873 0.0038941 ** 
## instrumentalness -22.4710673   3.6493175 -6.1576 7.695e-10 ***
## liveness          -0.3267759   1.2425038 -0.2630 0.7925581    
## loudness_sqr      -0.0217286   0.0081831 -2.6553 0.0079377 ** 
## speechiness        1.7785377   1.6876111  1.0539 0.2919661    
## tempo              0.0086813   0.0063647  1.3640 0.1726077    
## time_signature     1.5501076   0.4947055  3.1334 0.0017334 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
re_full_list.robust = coeftest(re_full_list, vcov. = vcovHC, type = "HC1")
print(re_full_list.robust)
## 
## t test of coefficients:
## 
##                     Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept)       75.0871517   1.3350626  56.2424 < 2.2e-16 ***
## length_sec        -0.0318794   0.0012764 -24.9763 < 2.2e-16 ***
## danceability       4.4876618   0.5238828   8.5662 < 2.2e-16 ***
## acousticness      -1.4316852   0.4390346  -3.2610  0.001111 ** 
## energy           -10.5176541   0.6263767 -16.7913 < 2.2e-16 ***
## instrumentalness -30.2397848   2.4494673 -12.3455 < 2.2e-16 ***
## liveness          -1.4707692   0.4986896  -2.9493  0.003186 ** 
## loudness_sqr      -0.0709723   0.0053857 -13.1779 < 2.2e-16 ***
## speechiness       -2.2530698   0.7894147  -2.8541  0.004317 ** 
## tempo              0.0049571   0.0024975   1.9848  0.047170 *  
## time_signature     1.2378352   0.1892345   6.5413 6.125e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2 Plots

fitted_re = re$model[[1]] - re$residuals

residplot = as.data.frame(cbind(fitted_re, re$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Unique song with selected attributes")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

fitted_re_full_attri = re_full_attri$model[[1]] - re_full_attri$residuals

residplot = as.data.frame(cbind(fitted_re_full_attri, re_full_attri$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Unique song with full attributes")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

fitted_re_full_list = re_full_list$model[[1]] - re_full_list$residuals

residplot = as.data.frame(cbind(fitted_re_full_list, re_full_list$residuals))
colnames(residplot) = c("Fitted", "Residual")
ggplot(residplot, aes(x = Fitted, y = Residual)) + ggtitle("Full song list")+
  geom_point(shape = 1, col = "black") +
  geom_hline(yintercept = 0, col = "red") + 
  theme_light()

2.3 Variance Inflation Factor

print("mean_df_rankInfo optimised arrtibutes for unique song")
## [1] "mean_df_rankInfo optimised arrtibutes for unique song"
vif(re)
##       length_sec     danceability           energy instrumentalness 
##         1.028404         1.086488         1.577463         1.204972 
##     loudness_sqr            tempo   time_signature 
##         1.811418         1.043060         1.043163
print("mean_df_rankInfo full arrtibutes for unique song")
## [1] "mean_df_rankInfo full arrtibutes for unique song"
vif(re_full_attri)
##       length_sec     danceability     acousticness           energy 
##         1.031891         1.252431         1.595145         2.020094 
## instrumentalness         liveness     loudness_sqr      speechiness 
##         1.210817         1.051848         1.837953         1.127034 
##            tempo   time_signature 
##         1.057674         1.044975
print("mean_df_selected full list of songs")
## [1] "mean_df_selected full list of songs"
vif(re_full_list)
##       length_sec     danceability     acousticness           energy 
##         1.022011         1.212215         1.472455         2.090232 
## instrumentalness         liveness     loudness_sqr      speechiness 
##         1.165333         1.043959         1.903327         1.113270 
##            tempo   time_signature 
##         1.063788         1.028335

2.5 Hausman Test

# conduct the Hausman test on RE vs. FE with function `phtest` in `plm`
phtest(fe, re)
## 
##  Hausman Test
## 
## data:  popularity ~ length_sec + danceability + energy + instrumentalness +  ...
## chisq = 1237.6, df = 7, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# robust Hausman test with vcov supplied as a function and additional parameters
phtest(popularity ~ length_sec + danceability + energy + instrumentalness + loudness_sqr + tempo + time_signature, data = main_df_rankInfo, method = "aux", 
       vcov = function(x) vcovHC(x, type = "HC1"))
## 
##  Regression-based Hausman test, vcov: function(x) vcovHC(x, type =
##  "HC1")
## 
## data:  popularity ~ length_sec + danceability + energy + instrumentalness +     loudness_sqr + tempo + time_signature
## chisq = 193.77, df = 7, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
#Based on the results shown above, we can safely reject the null hypotheses for both Hausman tests and conclude that the FE estimator is more consistent than the RE estimator.

# conduct the Hausman test on RE vs. FE with function `phtest` in `plm`
phtest(fe_full_attri, re_full_attri)
## 
##  Hausman Test
## 
## data:  popularity ~ length_sec + danceability + acousticness + energy +  ...
## chisq = 1071.9, df = 10, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# robust Hausman test with vcov supplied as a function and additional parameters
phtest(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_rankInfo, method = "aux", 
       vcov = function(x) vcovHC(x, type = "HC1"))
## 
##  Regression-based Hausman test, vcov: function(x) vcovHC(x, type =
##  "HC1")
## 
## data:  popularity ~ length_sec + danceability + acousticness + energy +     instrumentalness + liveness + loudness_sqr + speechiness +     tempo + time_signature
## chisq = 227.73, df = 10, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
#Based on the results shown above, we can safely reject the null hypotheses for both Hausman tests and conclude that the FE estimator is more consistent than the RE estimator.

# conduct the Hausman test on RE vs. FE with function `phtest` in `plm`
phtest(fe_full_list, re_full_list)
## 
##  Hausman Test
## 
## data:  popularity ~ length_sec + danceability + acousticness + energy +  ...
## chisq = 2560935, df = 10, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# robust Hausman test with vcov supplied as a function and additional parameters
phtest(popularity ~ length_sec +  danceability + acousticness + energy + instrumentalness + liveness+ loudness_sqr + speechiness+ tempo + time_signature, data = main_df_selected, method = "aux", 
       vcov = function(x) vcovHC(x, type = "HC1"))
## 
##  Regression-based Hausman test, vcov: function(x) vcovHC(x, type =
##  "HC1")
## 
## data:  popularity ~ length_sec + danceability + acousticness + energy +     instrumentalness + liveness + loudness_sqr + speechiness +     tempo + time_signature
## chisq = 4834.6, df = 10, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
#Based on the results shown above, we can safely reject the null hypotheses for both Hausman tests and conclude that the FE estimator is more consistent than the RE estimator.

Remove NA variables for 2SLS testing - 20 records

main_df_selected2 <- main_df_selected %>%
  na.omit(main_df_selected)

2SLS for Causality - Tempo VS Dancebility -> Popularity

#used loudness as IV
rhs = c("length_sec" ,  "danceability" , "acousticness" , "energy" , "instrumentalness" , "liveness" , "speechiness", "time_signature")

fmla.ols = reformulate(rhs, "popularity")

fit.ols = lm(fmla.ols, data = main_df_selected2)

summary(fit.ols)
## 
## Call:
## lm(formula = fmla.ols, data = main_df_selected2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -70.566  -7.555   2.786  11.278  55.065 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       62.13945    0.89600  69.352  < 2e-16 ***
## length_sec        -0.02831    0.00109 -25.976  < 2e-16 ***
## danceability       8.63740    0.37275  23.172  < 2e-16 ***
## acousticness      -2.80526    0.28882  -9.713  < 2e-16 ***
## energy            -1.74827    0.36131  -4.839 1.31e-06 ***
## instrumentalness -40.47728    0.62298 -64.974  < 2e-16 ***
## liveness          -1.75818    0.37421  -4.698 2.63e-06 ***
## speechiness       -4.77539    0.50214  -9.510  < 2e-16 ***
## time_signature     1.69139    0.20036   8.442  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.88 on 114071 degrees of freedom
## Multiple R-squared:  0.05059,    Adjusted R-squared:  0.05053 
## F-statistic: 759.9 on 8 and 114071 DF,  p-value: < 2.2e-16
#testing tempo for danceability correlation
rhs.1stage = c(rhs[-2],"tempo")
rhs.2stage = rhs
fmla.1stage = paste(rhs.1stage, collapse = " + ")
fmla.2stage = paste("popularity ~ ", paste(rhs.2stage, collapse = " + "), sep = "")
fmla.2sls = paste(fmla.2stage, fmla.1stage, sep = " | ")
fmla.2sls = as.formula(fmla.2sls)

fit.2sls = ivreg(fmla.2sls, data = main_df_selected2)
summary(fit.2sls)
## 
## Call:
## ivreg(formula = fmla.2sls, data = main_df_selected2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -70.227  -7.589   2.855  11.380  55.658 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       65.097144   1.433891  45.399  < 2e-16 ***
## length_sec        -0.029979   0.001261 -23.772  < 2e-16 ***
## danceability       3.761494   1.881966   1.999 0.045643 *  
## acousticness      -3.530845   0.398615  -8.858  < 2e-16 ***
## energy            -2.530662   0.467279  -5.416 6.12e-08 ***
## instrumentalness -40.521310   0.623670 -64.972  < 2e-16 ***
## liveness          -2.201710   0.410363  -5.365 8.10e-08 ***
## speechiness       -3.103073   0.807954  -3.841 0.000123 ***
## time_signature     1.971240   0.226741   8.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.89 on 114071 degrees of freedom
## Multiple R-Squared: 0.04917, Adjusted R-squared: 0.0491 
## Wald test: 692.2 on 8 and 114071 DF,  p-value: < 2.2e-16
car::compareCoefs(fit.ols, fit.2sls)
## Calls:
## 1: lm(formula = fmla.ols, data = main_df_selected2)
## 2: ivreg(formula = fmla.2sls, data = main_df_selected2)
## 
##                   Model 1  Model 2
## (Intercept)        62.139   65.097
## SE                  0.896    1.434
##                                   
## length_sec       -0.02830 -0.02998
## SE                0.00109  0.00126
##                                   
## danceability        8.637    3.761
## SE                  0.373    1.882
##                                   
## acousticness       -2.805   -3.531
## SE                  0.289    0.399
##                                   
## energy             -1.748   -2.531
## SE                  0.361    0.467
##                                   
## instrumentalness  -40.477  -40.521
## SE                  0.623    0.624
##                                   
## liveness           -1.758   -2.202
## SE                  0.374    0.410
##                                   
## speechiness        -4.775   -3.103
## SE                  0.502    0.808
##                                   
## time_signature      1.691    1.971
## SE                  0.200    0.227
## 

Running 2SLS with “Two Stage of OLS”

The result of 2SLS should be similar that from running stage 1 and 2 with OLS, since this is how 2SLS works. The only difference is that the standard errors from “manually” running 2 stages of OLS is incorrect and we should refer to ivreg for the correct computation of standard errors.

fmla.ols1 = reformulate(rhs.1stage, "danceability")
fit.1stage = lm(fmla.ols1, data = main_df_selected2)
summary(fit.1stage)
## 
## Call:
## lm(formula = fmla.ols1, data = main_df_selected2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56205 -0.08526  0.00609  0.09333  0.38377 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.273e-01  6.978e-03 104.236   <2e-16 ***
## length_sec       -3.598e-04  8.428e-06 -42.684   <2e-16 ***
## acousticness     -1.523e-01  2.207e-03 -69.000   <2e-16 ***
## energy           -1.417e-01  2.788e-03 -50.821   <2e-16 ***
## instrumentalness  2.219e-03  4.853e-03   0.457    0.648    
## liveness         -8.701e-02  2.902e-03 -29.982   <2e-16 ***
## speechiness       3.538e-01  3.784e-03  93.505   <2e-16 ***
## time_signature    5.274e-02  1.552e-03  33.972   <2e-16 ***
## tempo            -9.290e-04  1.360e-05 -68.300   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1314 on 114071 degrees of freedom
## Multiple R-squared:  0.1725, Adjusted R-squared:  0.1725 
## F-statistic:  2973 on 8 and 114071 DF,  p-value: < 2.2e-16
main_df_selected2$fitted.danceability = fit.1stage$fitted.values

rhs.ols2 = c("fitted.danceability", rhs[-1])
fmla.ols2 = reformulate(rhs.ols2, "popularity")

fit.2stage = lm(fmla.ols2, data = main_df_selected2)
summary(fit.2stage)
## 
## Call:
## lm(formula = fmla.ols2, data = main_df_selected2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.067  -7.710   2.844  11.382  53.328 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          45.7184     1.1816  38.691  < 2e-16 ***
## fitted.danceability  17.8520     1.6631  10.734  < 2e-16 ***
## danceability          8.8368     0.3812  23.180  < 2e-16 ***
## acousticness          0.1583     0.3678   0.430  0.66691    
## energy                1.5240     0.4358   3.497  0.00047 ***
## instrumentalness    -40.2981     0.6247 -64.512  < 2e-16 ***
## liveness             -0.2229     0.4025  -0.554  0.57970    
## speechiness         -10.6630     0.7440 -14.332  < 2e-16 ***
## time_signature        0.7002     0.2207   3.172  0.00151 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.92 on 114071 degrees of freedom
## Multiple R-squared:  0.04594,    Adjusted R-squared:  0.04587 
## F-statistic: 686.6 on 8 and 114071 DF,  p-value: < 2.2e-16
cor.test(main_df_selected2$danceability, main_df_selected2$tempo, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  main_df_selected2$danceability and main_df_selected2$tempo
## S = 2.8668e+14, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1585566
# observe that the formula for 2SLS highlights the "two stages" of OLS regression
rhs.1stage.2iv = c(rhs[-2],"tempo", "loudness_sqr")
fmla.1stage.2iv = paste(rhs.1stage.2iv, collapse = " + ")
fmla.2sls = paste(fmla.2stage, fmla.1stage.2iv, sep = " | ")
fmla.2sls.2iv = as.formula(fmla.2sls)
# print out the formula for 2SLS
print(fmla.2sls.2iv)
## popularity ~ length_sec + danceability + acousticness + energy + 
##     instrumentalness + liveness + speechiness + time_signature | 
##     length_sec + acousticness + energy + instrumentalness + liveness + 
##         speechiness + time_signature + tempo + loudness_sqr
# running a 2SLS with IV being `tempo`
fit.2sls.2iv = ivreg(fmla.2sls.2iv, data = main_df_selected2)
summary(fit.2sls.2iv)
## 
## Call:
## ivreg(formula = fmla.2sls.2iv, data = main_df_selected2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.488  -7.906   2.701  11.436  58.626 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       46.180981   1.390121  33.221  < 2e-16 ***
## length_sec        -0.019270   0.001261 -15.282  < 2e-16 ***
## danceability      34.945754   1.766274  19.785  < 2e-16 ***
## acousticness       1.109705   0.391070   2.838  0.00455 ** 
## energy             2.473193   0.461342   5.361  8.3e-08 ***
## instrumentalness -40.239728   0.636629 -63.208  < 2e-16 ***
## liveness           0.634946   0.413233   1.537  0.12441    
## speechiness      -13.798527   0.782997 -17.623  < 2e-16 ***
## time_signature     0.181406   0.227365   0.798  0.42495    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.25 on 114071 degrees of freedom
## Multiple R-Squared: 0.009133,    Adjusted R-squared: 0.009063 
## Wald test: 712.7 on 8 and 114071 DF,  p-value: < 2.2e-16
# let's inspect the how relevant `tempo` and `loudness` are in the first stage
fmla.ols1.2iv = reformulate(rhs.1stage.2iv, "danceability")
print(fmla.ols1.2iv)
## danceability ~ length_sec + acousticness + energy + instrumentalness + 
##     liveness + speechiness + time_signature + tempo + loudness_sqr
fit.ols1.2iv = lm(fmla.ols1.2iv, data = main_df_selected2)
summary(fit.ols1.2iv)
## 
## Call:
## lm(formula = fmla.ols1.2iv, data = main_df_selected2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58757 -0.08483  0.00653  0.09297  0.39651 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.766e-01  7.152e-03  108.59   <2e-16 ***
## length_sec       -3.616e-04  8.397e-06  -43.06   <2e-16 ***
## acousticness     -1.461e-01  2.209e-03  -66.12   <2e-16 ***
## energy           -1.924e-01  3.272e-03  -58.81   <2e-16 ***
## instrumentalness  5.692e-02  5.182e-03   10.98   <2e-16 ***
## liveness         -8.434e-02  2.893e-03  -29.16   <2e-16 ***
## speechiness       3.635e-01  3.784e-03   96.05   <2e-16 ***
## time_signature    5.234e-02  1.547e-03   33.84   <2e-16 ***
## tempo            -9.283e-04  1.355e-05  -68.51   <2e-16 ***
## loudness_sqr     -3.815e-04  1.300e-05  -29.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1309 on 114070 degrees of freedom
## Multiple R-squared:  0.1787, Adjusted R-squared:  0.1786 
## F-statistic:  2758 on 9 and 114070 DF,  p-value: < 2.2e-16
# we could also jointly test on `tempo = loudness = 0` using F-test (Chow-test)
linearHypothesis(fit.ols1.2iv, c("tempo = 0", "loudness_sqr = 0"))
Res.Df RSS Df Sum of Sq F Pr(>F)
114072 2051.123 NA NA NA NA
114070 1955.785 2 95.33808 2780.269 0